markmv
Version:
TypeScript CLI for markdown file operations with intelligent link refactoring
353 lines • 12.9 kB
JavaScript
import { constants, access } from 'node:fs/promises';
import { readFile } from 'node:fs/promises';
/**
* Validates markdown links and identifies broken or problematic references.
*
* The LinkValidator checks various types of links including internal file references, external
* URLs, and Claude import syntax. It provides comprehensive reporting of validation issues and
* supports different validation modes for different use cases.
*
* @category Core
*
* @example
* Basic link validation
* ```typescript
* const validator = new LinkValidator({
* checkExternal: true,
* strictInternal: true,
* externalTimeout: 10000
* });
*
* const result = await validator.validateFile('docs/api.md');
*
* if (!result.isValid) {
* console.log(`Found ${result.brokenLinks.length} broken links`);
* result.brokenLinks.forEach(link => {
* console.log(`- ${link.href} (line ${link.line}): ${link.reason}`);
* });
* }
* ```
*
* @example
* Batch validation
* ```typescript
* const validator = new LinkValidator();
* const files = ['docs/guide.md', 'docs/api.md', 'docs/examples.md'];
*
* const results = await validator.validateFiles(files);
* const totalBroken = results.reduce((sum, r) => sum + r.brokenLinks.length, 0);
* console.log(`Found ${totalBroken} broken links across ${files.length} files`);
* ```
*/
export class LinkValidator {
options;
constructor(options = {}) {
this.options = {
checkExternal: options.checkExternal ?? false,
externalTimeout: options.externalTimeout ?? 5000,
strictInternal: options.strictInternal ?? true,
checkClaudeImports: options.checkClaudeImports ?? true,
};
}
async validateFiles(files) {
const brokenLinks = [];
const warnings = [];
let linksChecked = 0;
for (const file of files) {
const fileErrors = await this.validateFile(file);
brokenLinks.push(...fileErrors);
linksChecked += file.links.length;
}
return {
valid: brokenLinks.length === 0,
filesChecked: files.length,
linksChecked,
brokenLinks,
warnings,
};
}
async validateFile(file) {
const brokenLinks = [];
for (const link of file.links) {
const broken = await this.validateLink(link, file.filePath);
if (broken) {
brokenLinks.push(broken);
}
}
return brokenLinks;
}
async validateLink(link, sourceFile) {
try {
switch (link.type) {
case 'internal':
return await this.validateInternalLink(link, sourceFile);
case 'claude-import':
return this.options.checkClaudeImports
? await this.validateClaudeImportLink(link, sourceFile)
: null;
case 'external':
return this.options.checkExternal
? await this.validateExternalLink(link, sourceFile)
: null;
case 'anchor':
return await this.validateAnchorLink(link, sourceFile);
case 'image':
return await this.validateImageLink(link, sourceFile);
case 'reference':
// Reference links are validated if they resolve to an internal/external link
return null;
default:
return null;
}
}
catch (error) {
return {
sourceFile,
link,
reason: 'invalid-format',
details: error instanceof Error ? error.message : String(error),
};
}
}
async validateInternalLink(link, sourceFile) {
if (!link.resolvedPath) {
return {
sourceFile,
link,
reason: 'invalid-format',
details: 'Could not resolve internal link path',
};
}
try {
await access(link.resolvedPath, constants.F_OK);
return null; // Link is valid
}
catch {
if (this.options.strictInternal) {
return {
sourceFile,
link,
reason: 'file-not-found',
details: `File does not exist: ${link.resolvedPath}`,
};
}
return null; // Not strict, so ignore missing files
}
}
async validateClaudeImportLink(link, sourceFile) {
if (!link.resolvedPath) {
return {
sourceFile,
link,
reason: 'invalid-format',
details: 'Could not resolve Claude import path',
};
}
// Claude imports should point to existing files
try {
await access(link.resolvedPath, constants.F_OK);
return null; // Import is valid
}
catch {
return {
sourceFile,
link,
reason: 'file-not-found',
details: `Claude import file does not exist: ${link.resolvedPath}`,
};
}
}
async validateExternalLink(link, sourceFile) {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.options.externalTimeout);
const response = await fetch(link.href, {
method: 'HEAD',
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
return {
sourceFile,
link,
reason: 'external-error',
details: `HTTP ${response.status}: ${response.statusText}`,
};
}
return null; // Link is valid
}
catch (error) {
return {
sourceFile,
link,
reason: 'external-error',
details: error instanceof Error ? error.message : String(error),
};
}
}
async validateImageLink(link, sourceFile) {
// For external images, use external validation if enabled
if (link.href.startsWith('http')) {
return this.options.checkExternal ? await this.validateExternalLink(link, sourceFile) : null;
}
// For internal images, check if file exists
if (!link.resolvedPath) {
return {
sourceFile,
link,
reason: 'invalid-format',
details: 'Could not resolve image path',
};
}
try {
await access(link.resolvedPath, constants.F_OK);
return null; // Image exists
}
catch {
return {
sourceFile,
link,
reason: 'file-not-found',
details: `Image file does not exist: ${link.resolvedPath}`,
};
}
}
async validateLinkIntegrity(files) {
const validationResult = await this.validateFiles(files);
const circularReferences = await this.checkCircularReferences(files);
const warnings = [...validationResult.warnings];
if (circularReferences.length > 0) {
warnings.push(`Found ${circularReferences.length} circular reference(s)`);
}
return {
valid: validationResult.valid && circularReferences.length === 0,
circularReferences,
brokenLinks: validationResult.brokenLinks,
warnings,
};
}
/**
* Validates a specific array of links from a single file.
*
* @param links - Array of links to validate
* @param sourceFile - Path to the source file containing the links
*
* @returns Promise resolving to validation result with broken links
*/
async validateLinks(links, sourceFile) {
const brokenLinks = [];
for (const link of links) {
const broken = await this.validateLink(link, sourceFile);
if (broken) {
brokenLinks.push(broken);
}
}
return { brokenLinks };
}
async checkCircularReferences(files) {
// Check if we have ParsedMarkdownFile[] (test case) or string[] (normal case)
if (files.length > 0 && typeof files[0] === 'object' && 'filePath' in files[0]) {
// ParsedMarkdownFile[] case - check for circular dependencies
const parsedFiles = files.filter((f) => typeof f === 'object' && f !== null && 'filePath' in f);
const visited = new Set();
const recursionStack = new Set();
const cycles = [];
const detectCycle = (filePath, path) => {
if (recursionStack.has(filePath)) {
// Found a cycle - extract the cycle from the path
const cycleStart = path.indexOf(filePath);
const cycle = path.slice(cycleStart).concat(filePath);
cycles.push(cycle);
return;
}
if (visited.has(filePath)) {
return;
}
visited.add(filePath);
recursionStack.add(filePath);
// Find the file and check its dependencies
const file = parsedFiles.find((f) => f.filePath === filePath);
if (file && file.dependencies) {
for (const dependency of file.dependencies) {
detectCycle(dependency, [...path, filePath]);
}
}
recursionStack.delete(filePath);
};
// Check each file for cycles
for (const file of parsedFiles) {
if (!visited.has(file.filePath)) {
detectCycle(file.filePath, []);
}
}
return cycles;
}
else {
// string[] case - return basic implementation
return {
hasCircularReferences: false,
};
}
}
/**
* Validates anchor links by checking if the target heading exists in the file.
*
* @param link - The anchor link to validate
* @param sourceFile - Path to the file containing the link
*
* @returns Promise resolving to BrokenLink if invalid, null if valid
*/
async validateAnchorLink(link, sourceFile) {
try {
// Extract the anchor from the href (remove the #)
const anchor = link.href.substring(1);
if (!anchor) {
return {
sourceFile,
link,
reason: 'invalid-format',
details: 'Empty anchor reference',
};
}
// Read the source file to check for the heading
const content = await readFile(sourceFile, 'utf-8');
// Convert anchor to the format used in markdown headings
// GitHub-style anchor generation: lowercase, replace spaces with hyphens, remove special chars
const normalizedAnchor = anchor
.toLowerCase()
.replace(/\s+/g, '-')
.replace(/[^\w-]/g, '');
// Look for headings in the file
const headingRegex = /^#+\s+(.+)$/gm;
let match;
const headings = [];
while ((match = headingRegex.exec(content)) !== null) {
const heading = match[1];
const normalizedHeading = heading
.toLowerCase()
.replace(/\s+/g, '-')
.replace(/[^\w-]/g, '');
headings.push(normalizedHeading);
// Check if this heading matches our anchor
if (normalizedHeading === normalizedAnchor) {
return null; // Anchor is valid
}
}
return {
sourceFile,
link,
reason: 'file-not-found',
details: `Anchor "${anchor}" not found. Available headings: ${headings.join(', ')}`,
};
}
catch (error) {
return {
sourceFile,
link,
reason: 'invalid-format',
details: `Error validating anchor: ${error instanceof Error ? error.message : String(error)}`,
};
}
}
}
//# sourceMappingURL=link-validator.js.map