UNPKG

markmv

Version:

TypeScript CLI for markdown file operations with intelligent link refactoring

353 lines 12.9 kB
import { constants, access } from 'node:fs/promises'; import { readFile } from 'node:fs/promises'; /** * Validates markdown links and identifies broken or problematic references. * * The LinkValidator checks various types of links including internal file references, external * URLs, and Claude import syntax. It provides comprehensive reporting of validation issues and * supports different validation modes for different use cases. * * @category Core * * @example * Basic link validation * ```typescript * const validator = new LinkValidator({ * checkExternal: true, * strictInternal: true, * externalTimeout: 10000 * }); * * const result = await validator.validateFile('docs/api.md'); * * if (!result.isValid) { * console.log(`Found ${result.brokenLinks.length} broken links`); * result.brokenLinks.forEach(link => { * console.log(`- ${link.href} (line ${link.line}): ${link.reason}`); * }); * } * ``` * * @example * Batch validation * ```typescript * const validator = new LinkValidator(); * const files = ['docs/guide.md', 'docs/api.md', 'docs/examples.md']; * * const results = await validator.validateFiles(files); * const totalBroken = results.reduce((sum, r) => sum + r.brokenLinks.length, 0); * console.log(`Found ${totalBroken} broken links across ${files.length} files`); * ``` */ export class LinkValidator { options; constructor(options = {}) { this.options = { checkExternal: options.checkExternal ?? false, externalTimeout: options.externalTimeout ?? 5000, strictInternal: options.strictInternal ?? true, checkClaudeImports: options.checkClaudeImports ?? true, }; } async validateFiles(files) { const brokenLinks = []; const warnings = []; let linksChecked = 0; for (const file of files) { const fileErrors = await this.validateFile(file); brokenLinks.push(...fileErrors); linksChecked += file.links.length; } return { valid: brokenLinks.length === 0, filesChecked: files.length, linksChecked, brokenLinks, warnings, }; } async validateFile(file) { const brokenLinks = []; for (const link of file.links) { const broken = await this.validateLink(link, file.filePath); if (broken) { brokenLinks.push(broken); } } return brokenLinks; } async validateLink(link, sourceFile) { try { switch (link.type) { case 'internal': return await this.validateInternalLink(link, sourceFile); case 'claude-import': return this.options.checkClaudeImports ? await this.validateClaudeImportLink(link, sourceFile) : null; case 'external': return this.options.checkExternal ? await this.validateExternalLink(link, sourceFile) : null; case 'anchor': return await this.validateAnchorLink(link, sourceFile); case 'image': return await this.validateImageLink(link, sourceFile); case 'reference': // Reference links are validated if they resolve to an internal/external link return null; default: return null; } } catch (error) { return { sourceFile, link, reason: 'invalid-format', details: error instanceof Error ? error.message : String(error), }; } } async validateInternalLink(link, sourceFile) { if (!link.resolvedPath) { return { sourceFile, link, reason: 'invalid-format', details: 'Could not resolve internal link path', }; } try { await access(link.resolvedPath, constants.F_OK); return null; // Link is valid } catch { if (this.options.strictInternal) { return { sourceFile, link, reason: 'file-not-found', details: `File does not exist: ${link.resolvedPath}`, }; } return null; // Not strict, so ignore missing files } } async validateClaudeImportLink(link, sourceFile) { if (!link.resolvedPath) { return { sourceFile, link, reason: 'invalid-format', details: 'Could not resolve Claude import path', }; } // Claude imports should point to existing files try { await access(link.resolvedPath, constants.F_OK); return null; // Import is valid } catch { return { sourceFile, link, reason: 'file-not-found', details: `Claude import file does not exist: ${link.resolvedPath}`, }; } } async validateExternalLink(link, sourceFile) { try { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), this.options.externalTimeout); const response = await fetch(link.href, { method: 'HEAD', signal: controller.signal, }); clearTimeout(timeoutId); if (!response.ok) { return { sourceFile, link, reason: 'external-error', details: `HTTP ${response.status}: ${response.statusText}`, }; } return null; // Link is valid } catch (error) { return { sourceFile, link, reason: 'external-error', details: error instanceof Error ? error.message : String(error), }; } } async validateImageLink(link, sourceFile) { // For external images, use external validation if enabled if (link.href.startsWith('http')) { return this.options.checkExternal ? await this.validateExternalLink(link, sourceFile) : null; } // For internal images, check if file exists if (!link.resolvedPath) { return { sourceFile, link, reason: 'invalid-format', details: 'Could not resolve image path', }; } try { await access(link.resolvedPath, constants.F_OK); return null; // Image exists } catch { return { sourceFile, link, reason: 'file-not-found', details: `Image file does not exist: ${link.resolvedPath}`, }; } } async validateLinkIntegrity(files) { const validationResult = await this.validateFiles(files); const circularReferences = await this.checkCircularReferences(files); const warnings = [...validationResult.warnings]; if (circularReferences.length > 0) { warnings.push(`Found ${circularReferences.length} circular reference(s)`); } return { valid: validationResult.valid && circularReferences.length === 0, circularReferences, brokenLinks: validationResult.brokenLinks, warnings, }; } /** * Validates a specific array of links from a single file. * * @param links - Array of links to validate * @param sourceFile - Path to the source file containing the links * * @returns Promise resolving to validation result with broken links */ async validateLinks(links, sourceFile) { const brokenLinks = []; for (const link of links) { const broken = await this.validateLink(link, sourceFile); if (broken) { brokenLinks.push(broken); } } return { brokenLinks }; } async checkCircularReferences(files) { // Check if we have ParsedMarkdownFile[] (test case) or string[] (normal case) if (files.length > 0 && typeof files[0] === 'object' && 'filePath' in files[0]) { // ParsedMarkdownFile[] case - check for circular dependencies const parsedFiles = files.filter((f) => typeof f === 'object' && f !== null && 'filePath' in f); const visited = new Set(); const recursionStack = new Set(); const cycles = []; const detectCycle = (filePath, path) => { if (recursionStack.has(filePath)) { // Found a cycle - extract the cycle from the path const cycleStart = path.indexOf(filePath); const cycle = path.slice(cycleStart).concat(filePath); cycles.push(cycle); return; } if (visited.has(filePath)) { return; } visited.add(filePath); recursionStack.add(filePath); // Find the file and check its dependencies const file = parsedFiles.find((f) => f.filePath === filePath); if (file && file.dependencies) { for (const dependency of file.dependencies) { detectCycle(dependency, [...path, filePath]); } } recursionStack.delete(filePath); }; // Check each file for cycles for (const file of parsedFiles) { if (!visited.has(file.filePath)) { detectCycle(file.filePath, []); } } return cycles; } else { // string[] case - return basic implementation return { hasCircularReferences: false, }; } } /** * Validates anchor links by checking if the target heading exists in the file. * * @param link - The anchor link to validate * @param sourceFile - Path to the file containing the link * * @returns Promise resolving to BrokenLink if invalid, null if valid */ async validateAnchorLink(link, sourceFile) { try { // Extract the anchor from the href (remove the #) const anchor = link.href.substring(1); if (!anchor) { return { sourceFile, link, reason: 'invalid-format', details: 'Empty anchor reference', }; } // Read the source file to check for the heading const content = await readFile(sourceFile, 'utf-8'); // Convert anchor to the format used in markdown headings // GitHub-style anchor generation: lowercase, replace spaces with hyphens, remove special chars const normalizedAnchor = anchor .toLowerCase() .replace(/\s+/g, '-') .replace(/[^\w-]/g, ''); // Look for headings in the file const headingRegex = /^#+\s+(.+)$/gm; let match; const headings = []; while ((match = headingRegex.exec(content)) !== null) { const heading = match[1]; const normalizedHeading = heading .toLowerCase() .replace(/\s+/g, '-') .replace(/[^\w-]/g, ''); headings.push(normalizedHeading); // Check if this heading matches our anchor if (normalizedHeading === normalizedAnchor) { return null; // Anchor is valid } } return { sourceFile, link, reason: 'file-not-found', details: `Anchor "${anchor}" not found. Available headings: ${headings.join(', ')}`, }; } catch (error) { return { sourceFile, link, reason: 'invalid-format', details: `Error validating anchor: ${error instanceof Error ? error.message : String(error)}`, }; } } } //# sourceMappingURL=link-validator.js.map