UNPKG

doctool

Version:

AI-powered documentation validation and management system

418 lines 16.7 kB
import * as fs from 'fs'; import * as path from 'path'; /** * Validates links and URLs in documentation files */ export class LinkValidator { basePath; timeoutMs; userAgent; constructor(basePath = process.cwd(), timeoutMs = 5000) { this.basePath = basePath; this.timeoutMs = timeoutMs; this.userAgent = 'DocTool Link Validator'; } /** * Validates all links in a documentation file */ async validateDocumentationFile(docFilePath) { try { const content = fs.readFileSync(docFilePath, 'utf8'); const links = this.extractLinks(content, docFilePath); const issues = []; for (const link of links) { const result = await this.validateLink(link, docFilePath); issues.push(...result.issues); } return issues; } catch (error) { return [{ type: 'invalid_path', severity: 'error', message: `Could not read documentation file: ${error}`, location: { file: docFilePath, line: 1, context: 'File reading error' }, file_reference: { path: docFilePath, type: 'file', mentioned_in: { file: docFilePath, line: 1, context: 'File reading error' }, exists: false } }]; } } /** * Extracts all links from markdown content */ extractLinks(content, sourceFile) { const links = []; const lines = content.split('\n'); lines.forEach((line, lineIndex) => { // 1. Markdown links: [text](url) or [text](url "title") const markdownLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g; let match; while ((match = markdownLinkRegex.exec(line)) !== null) { const url = match[2].split(' ')[0].trim(); // Remove title if present const linkType = this.determineLinkType(url); links.push({ url, type: linkType, mentioned_in: { file: sourceFile, line: lineIndex + 1, column: match.index, context: line.trim() }, status: 'unknown', target_file: linkType === 'internal' ? url : undefined, anchor: this.extractAnchor(url) }); } // 2. Reference-style links: [text][ref] (we'll skip the definition for now) const refLinkRegex = /\[([^\]]+)\]\[([^\]]+)\]/g; while ((match = refLinkRegex.exec(line)) !== null) { // For now, we'll note these but not validate them // In a full implementation, we'd need to find the reference definition } // 3. Autolinks: <url> const autolinkRegex = /<(https?:\/\/[^>]+)>/g; while ((match = autolinkRegex.exec(line)) !== null) { const url = match[1]; links.push({ url, type: this.determineLinkType(url), mentioned_in: { file: sourceFile, line: lineIndex + 1, column: match.index, context: line.trim() }, status: 'unknown' }); } // 4. Plain URLs (basic detection) const plainUrlRegex = /(?:^|\s)(https?:\/\/[^\s]+)/g; while ((match = plainUrlRegex.exec(line)) !== null) { const url = match[1]; links.push({ url, type: this.determineLinkType(url), mentioned_in: { file: sourceFile, line: lineIndex + 1, column: match.index, context: line.trim() }, status: 'unknown' }); } }); return this.deduplicateLinks(links); } /** * Validates a single link */ async validateLink(link, docFilePath) { const issues = []; let valid = true; try { switch (link.type) { case 'http': case 'https': const httpResult = await this.validateHttpUrl(link); if (!httpResult.valid) { valid = false; issues.push({ type: 'missing_file', // Reusing existing type severity: httpResult.permanent ? 'error' : 'warning', message: `${link.type.toUpperCase()} link is broken: ${link.url} (${httpResult.error})`, location: link.mentioned_in, suggestion: this.suggestUrlFix(link.url, httpResult.response_code), file_reference: { path: link.url, type: 'file', mentioned_in: link.mentioned_in, exists: false } }); } break; case 'internal': const internalResult = this.validateInternalLink(link, docFilePath); if (!internalResult.valid) { valid = false; issues.push({ type: 'missing_file', severity: 'error', message: `Internal link target not found: ${link.url}`, location: link.mentioned_in, suggestion: internalResult.suggestion || 'Check if the target file exists and the path is correct', file_reference: { path: link.target_file || link.url, type: 'file', mentioned_in: link.mentioned_in, exists: false } }); } break; case 'anchor': const anchorResult = this.validateAnchorLink(link, docFilePath); if (!anchorResult.valid) { valid = false; issues.push({ type: 'missing_file', severity: 'warning', message: `Anchor link target not found: ${link.url}`, location: link.mentioned_in, suggestion: anchorResult.suggestion || 'Check if the heading exists in the target file', file_reference: { path: link.url, type: 'file', mentioned_in: link.mentioned_in, exists: false } }); } break; case 'mailto': const emailResult = this.validateEmailAddress(link); if (!emailResult.valid) { valid = false; issues.push({ type: 'invalid_path', severity: 'warning', message: `Invalid email address: ${link.url}`, location: link.mentioned_in, suggestion: 'Check email address format', file_reference: { path: link.url, type: 'file', mentioned_in: link.mentioned_in, exists: false } }); } break; case 'ftp': // For now, we'll just mark FTP as valid since validation is complex // In a full implementation, we could attempt FTP connections break; } } catch (error) { valid = false; issues.push({ type: 'invalid_path', severity: 'error', message: `Error validating link ${link.url}: ${error}`, location: link.mentioned_in, file_reference: { path: link.url, type: 'file', mentioned_in: link.mentioned_in, exists: false } }); } link.status = valid ? 'valid' : 'broken'; return { link, valid, issues }; } /** * Validates HTTP/HTTPS URLs */ async validateHttpUrl(link) { try { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), this.timeoutMs); const response = await fetch(link.url, { method: 'HEAD', // Use HEAD to avoid downloading content signal: controller.signal, headers: { 'User-Agent': this.userAgent } }); clearTimeout(timeoutId); link.response_code = response.status; if (response.ok) { link.status = 'valid'; return { valid: true }; } else { const permanent = response.status >= 400 && response.status < 500; link.status = 'broken'; return { valid: false, error: `HTTP ${response.status} ${response.statusText}`, response_code: response.status, permanent }; } } catch (error) { link.status = 'unreachable'; if (error instanceof Error) { if (error.name === 'AbortError') { return { valid: false, error: 'Request timeout', permanent: false }; } return { valid: false, error: error.message, permanent: false }; } return { valid: false, error: 'Unknown error', permanent: false }; } } /** * Validates internal file links */ validateInternalLink(link, docFilePath) { const basePath = path.dirname(docFilePath); const targetPath = path.resolve(basePath, link.url.split('#')[0]); // Remove anchor try { const stats = fs.statSync(targetPath); return { valid: true }; } catch (error) { // Try to suggest similar files const suggestion = this.suggestSimilarFiles(link.url, basePath); return { valid: false, suggestion }; } } /** * Validates anchor links (#heading) */ validateAnchorLink(link, docFilePath) { const [filePart, anchor] = link.url.split('#'); if (!anchor) { return { valid: false, suggestion: 'Anchor link missing fragment identifier' }; } // Determine target file let targetFile; if (filePart) { // Link to another file with anchor const basePath = path.dirname(docFilePath); targetFile = path.resolve(basePath, filePart); } else { // Link within same file targetFile = docFilePath; } try { const content = fs.readFileSync(targetFile, 'utf8'); const headings = this.extractHeadings(content); const normalizedAnchor = this.normalizeAnchor(anchor); const headingExists = headings.some(heading => this.normalizeAnchor(heading) === normalizedAnchor); if (headingExists) { return { valid: true }; } else { const similarHeadings = headings .filter(h => h.toLowerCase().includes(anchor.toLowerCase())) .slice(0, 3); const suggestion = similarHeadings.length > 0 ? `Similar headings found: ${similarHeadings.join(', ')}` : 'Check if the heading exists in the target file'; return { valid: false, suggestion }; } } catch (error) { return { valid: false, suggestion: 'Could not read target file' }; } } /** * Validates email addresses */ validateEmailAddress(link) { const email = link.url.replace('mailto:', ''); // Basic email regex - not perfect but good enough for most cases const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; return { valid: emailRegex.test(email) }; } // Helper methods determineLinkType(url) { if (url.startsWith('http://')) return 'http'; if (url.startsWith('https://')) return 'https'; if (url.startsWith('ftp://')) return 'ftp'; if (url.startsWith('mailto:')) return 'mailto'; if (url.includes('#')) return 'anchor'; return 'internal'; } extractAnchor(url) { const parts = url.split('#'); return parts.length > 1 ? parts[1] : undefined; } deduplicateLinks(links) { const seen = new Set(); return links.filter(link => { const key = `${link.url}:${link.mentioned_in.line}`; if (seen.has(key)) return false; seen.add(key); return true; }); } extractHeadings(content) { const headings = []; const lines = content.split('\n'); for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith('#')) { // Extract heading text const heading = trimmed.replace(/^#+\s*/, '').replace(/\s*#+$/, ''); headings.push(heading); } } return headings; } normalizeAnchor(anchor) { // GitHub-style anchor normalization return anchor .toLowerCase() .replace(/[^\w\s-]/g, '') // Remove special characters .replace(/\s+/g, '-') // Replace spaces with hyphens .replace(/-+/g, '-') // Collapse multiple hyphens .replace(/^-|-$/g, ''); // Remove leading/trailing hyphens } suggestSimilarFiles(missingPath, basePath) { try { const targetDir = path.dirname(path.resolve(basePath, missingPath)); const files = fs.readdirSync(targetDir); const fileName = path.basename(missingPath, path.extname(missingPath)); const similar = files.filter(file => { const fileBaseName = path.basename(file, path.extname(file)); return fileBaseName.toLowerCase().includes(fileName.toLowerCase()) || fileName.toLowerCase().includes(fileBaseName.toLowerCase()); }); if (similar.length > 0) { return `Similar files found: ${similar.slice(0, 3).join(', ')}`; } } catch (error) { // Directory doesn't exist } return 'Check if the file path is correct and the file exists'; } suggestUrlFix(url, responseCode) { if (responseCode === 404) { return 'URL not found - check if the page has moved or been deleted'; } if (responseCode === 403) { return 'Access forbidden - the URL might require authentication'; } if (responseCode === 500) { return 'Server error - try again later or contact the website administrator'; } if (responseCode && responseCode >= 300 && responseCode < 400) { return 'URL redirects - consider updating to the final destination'; } return 'Check if the URL is correct and the website is accessible'; } } //# sourceMappingURL=linkValidator.js.map