@mintlify/link-rot
Version:
Static checking for broken internal links
307 lines (306 loc) • 11.2 kB
JavaScript
import { getFileListWithDirectories } from '@mintlify/prebuild';
import { existsSync } from 'fs';
import isAbsoluteUrl from 'is-absolute-url';
import { parse, join, resolve, relative, dirname, basename } from 'path';
import { sep as WINDOWS_SEPARATOR } from 'path/win32';
import { getLinkPaths, normalizePath } from './prebuild.js';
var PathType;
(function (PathType) {
PathType["INTERNAL"] = "internal";
PathType["EXTERNAL"] = "external";
PathType["DATA"] = "data";
})(PathType || (PathType = {}));
export var Wrapper;
(function (Wrapper) {
Wrapper["MD"] = "md";
Wrapper["SRC"] = "src";
Wrapper["HREF"] = "href";
Wrapper["CARD"] = "card";
})(Wrapper || (Wrapper = {}));
export var EdgeType;
(function (EdgeType) {
EdgeType["CONTENT"] = "content";
EdgeType["NAVIGATION"] = "navigation";
})(EdgeType || (EdgeType = {}));
/**
* An MdxPath is a path in an Mdx page. Contains all path-related information.
*/
export class MdxPath {
static getPathType(url) {
let pathType;
if (isAbsoluteUrl(url)) {
pathType = PathType.EXTERNAL;
}
else if (url.startsWith('data:')) {
pathType = PathType.DATA;
}
else {
pathType = PathType.INTERNAL;
}
return pathType;
}
/**
* Generate all the computed fields in an MdxPath
*/
constructParsedPath() {
const pathType = MdxPath.getPathType(this.originalPath);
let urlPath;
let path;
if (pathType === PathType.INTERNAL) {
let normalizedPath = this.originalPath.replaceAll(WINDOWS_SEPARATOR, '/');
if (normalizedPath.startsWith('.')) {
// also include paths that start with "..", etc.
normalizedPath = relative('.', resolve(this.relativeDir, normalizedPath));
// TODO path starts with .. if path resolves to file system outside of mintlify project
}
// remove leading slash
normalizedPath = normalizedPath.startsWith('/') ? normalizedPath.slice(1) : normalizedPath;
// resolve relative path
// add arbitrary protocol to parse as URL and isolate pathname
urlPath = new URL(`mintlify:${normalizedPath}`);
normalizedPath = urlPath.pathname;
// remove trailing slash
if (normalizedPath.endsWith('/')) {
normalizedPath = normalizedPath.slice(0, -1);
urlPath.pathname = normalizedPath;
}
// if path is "", then path points to same page
if (normalizedPath === '') {
normalizedPath = join(this.relativeDir, this.filename);
urlPath.pathname = normalizedPath;
}
path = parse(normalizedPath);
}
else {
try {
urlPath = new URL(this.originalPath);
path = urlPath;
}
catch (e) {
throw new Error(`Failed to parse path as URL: ${this.originalPath}. If this is a purposefully malformed path, please wrap it in backticks, i.e \`${this.originalPath}\` `);
}
}
const anchorLink = urlPath.hash || undefined;
const queryParams = urlPath.searchParams;
return { path, pathType, anchorLink, queryParams };
}
constructor(originalPath, relativeDir = '', filename = '', wrapper) {
this.originalPath = originalPath;
this.relativeDir = relativeDir;
this.filename = filename;
this.wrapper = wrapper;
this.relativeDir = normalizePath(relativeDir);
const { path, pathType, anchorLink, queryParams } = this.constructParsedPath();
this.path = path;
this.pathType = pathType;
this.anchorLink = anchorLink;
this.queryParams = queryParams;
}
toString() {
if (this.pathType === PathType.INTERNAL) {
const parsedPath = this.path;
return join(parsedPath.dir, parsedPath.base);
}
return this.path.toString();
}
getResolvedFiles(allFiles, baseDir) {
if (this.pathType !== PathType.INTERNAL)
return [];
const parsedPath = this.path;
const resolvedFiles = [];
if (existsSync(resolve(baseDir, this.toString()))) {
resolvedFiles.push(join(parsedPath.dir, parsedPath.base));
}
// there may be multiple files/folders corresponding to the node
for (const file of allFiles) {
const fileWithoutLeadingSlash = file.startsWith('/') ? file.slice(1) : file;
const parsedFilePath = parse(fileWithoutLeadingSlash);
if (parsedFilePath.dir === parsedPath.dir && parsedFilePath.name === parsedPath.name) {
resolvedFiles.push(fileWithoutLeadingSlash);
}
if (parsedFilePath.dir === parsedPath.dir && parsedPath.base === parsedFilePath.name) {
// account for edge case when there is another . in the filename
resolvedFiles.push(fileWithoutLeadingSlash);
}
}
return resolvedFiles;
}
resolvesTo(node, allFiles, baseDir) {
if (this.pathType !== PathType.INTERNAL)
return false;
if (this.toString() === node.toString())
return true;
if (this.getResolvedFiles(allFiles, baseDir).includes(node.toString()))
return true;
return false;
}
static filterMapInternalPaths(paths) {
return paths
.map((path) => new MdxPath(path))
.filter(({ pathType }) => pathType === PathType.INTERNAL)
.map((path) => path.toString());
}
}
/*
* A node is a representation of a page
* One node can contain multiple MdxPaths (corresponding to internal/external links in the page)
*/
export class Node {
constructor(label, paths = []) {
this.label = label;
this.paths = paths;
this.edges = [];
this.relativeDir = normalizePath(dirname(label));
this.filename = basename(label);
this.label = join(this.relativeDir, this.filename);
}
toString() {
return this.label;
}
equals(other) {
return this.label === other.label;
}
addPath(originalPath, wrapper) {
const path = new MdxPath(originalPath, this.relativeDir, this.filename, wrapper);
this.paths.push(path);
}
addOutgoingEdge(edge) {
const existingEdge = this.edges.find((e) => e.equals(edge));
if (existingEdge) {
existingEdge.incrementCount();
}
else {
this.edges.push(edge);
}
}
getChildNodes(depth = 1) {
const children = [];
for (const edge of this.edges) {
const child = edge.target;
children.push(child);
if (depth > 1) {
children.push(...child.getChildNodes(depth - 1));
}
}
return children;
}
}
export class Edge {
constructor(source, target, edgeType) {
this.source = source;
this.target = target;
this.edgeType = edgeType;
this.count = 0;
this.count++;
}
incrementCount() {
this.count++;
}
getCount() {
return this.count;
}
equals(other) {
// directionality of edge matters
return (this.source.equals(other.source) &&
this.target.equals(other.target) &&
this.edgeType === other.edgeType);
}
}
export class Graph {
constructor(baseDir) {
this.baseDir = baseDir;
this.nodes = {};
this.edges = [];
this.fileResolutionMap = new Map();
this.baseDir = resolve(baseDir);
}
addNode(label) {
if (this.nodes[label])
return this.nodes[label];
this.nodes[label] = new Node(label);
return this.nodes[label];
}
addNodes(labels) {
for (const label of labels) {
this.addNode(label);
}
}
/*
* Aliases are additional paths that are valid due to redirects
*/
addAliasNodes() {
// TODO move aliases computation to mint config package, since it'll depend on version
// for now this is a hacky implementation
const aliases = MdxPath.filterMapInternalPaths(getFileListWithDirectories(this.baseDir));
this.addNodes(aliases);
}
getNode(label) {
return this.nodes[label];
}
addEdge(source, target, edgeType = EdgeType.CONTENT) {
const newEdge = new Edge(source, target, edgeType);
const existingEdge = this.edges.find((edge) => {
if (edge.equals(newEdge)) {
edge.incrementCount();
return true;
}
return false;
});
if (existingEdge)
return existingEdge;
this.edges.push(newEdge);
return newEdge;
}
addEdgesBetweenNodes() {
const allFiles = getLinkPaths(this.baseDir);
for (const node of Object.values(this.nodes)) {
for (const path of node.paths) {
if (path.pathType === PathType.INTERNAL) {
const targetNode = Object.values(this.nodes).find((otherNode) => path.resolvesTo(otherNode, allFiles, this.baseDir));
if (targetNode) {
const edge = this.addEdge(node, targetNode);
node.addOutgoingEdge(edge);
}
}
}
}
}
precomputeFileResolutions() {
const allFiles = getLinkPaths(this.baseDir);
// Build resolution map for all internal paths
Object.values(this.nodes).forEach((node) => {
node.paths
.filter((path) => path.pathType === PathType.INTERNAL)
.forEach((path) => {
const resolvedFiles = path.getResolvedFiles(allFiles, this.baseDir);
const pathKey = path.toString();
this.fileResolutionMap.set(pathKey, new Set(resolvedFiles));
});
});
}
getBrokenInternalLinks() {
if (this.fileResolutionMap.size === 0) {
this.precomputeFileResolutions();
}
const brokenLinks = [];
const nodeSet = new Set(Object.values(this.nodes).map((node) => node.toString()));
for (const node of Object.values(this.nodes)) {
for (const path of node.paths) {
if (path.pathType === PathType.INTERNAL) {
const resolvedFiles = this.fileResolutionMap.get(path.toString());
if (!resolvedFiles || ![...resolvedFiles].some((file) => nodeSet.has(file))) {
brokenLinks.push(path);
}
}
}
}
return brokenLinks;
}
// DEBUGGING
getAllInternalPaths() {
return Object.values(this.nodes)
.flatMap((node) => node.paths)
.filter((path) => path.pathType === PathType.INTERNAL)
.map((path) => path.originalPath + ' => ' + path.toString());
}
}