markmv
Version:
TypeScript CLI for markdown file operations with intelligent link refactoring
235 lines • 8.89 kB
JavaScript
import { readFile, readdir } from 'node:fs/promises';
import { dirname, extname, isAbsolute, join, resolve } from 'node:path';
import remarkParse from 'remark-parse';
import { unified } from 'unified';
import { visit } from 'unist-util-visit';
/**
* Parser for extracting and analyzing markdown links and references.
*
* This class uses the unified/remark ecosystem to parse markdown files and extract comprehensive
* link information including inline links, images, reference-style links, and link definitions.
*
* @category Core
*
* @example
* Basic usage
* ```typescript
* const parser = new LinkParser();
* const parsed = await parser.parseFile('docs/readme.md');
*
* console.log(`Found ${parsed.links.length} links`);
* parsed.links.forEach(link => {
* console.log(`${link.type}: ${link.href} (line ${link.line})`);
* });
* ```
*
* @example
* Link validation
* ```typescript
* const parser = new LinkParser();
* const parsed = await parser.parseFile('guide.md');
*
* const localLinks = parsed.links.filter(link =>
* link.type === 'internal' && !link.href.startsWith('http')
* );
*
* for (const link of localLinks) {
* const exists = await parser.validateInternalLink(link, parsed.filePath);
* if (!exists) {
* console.warn(`Broken link: ${link.href} at line ${link.line}`);
* }
* }
* ```
*/
export class LinkParser {
processor = unified().use(remarkParse);
/**
* Parse a markdown file and extract all links, references, and metadata.
*
* @example
* ```typescript
* const parser = new LinkParser();
* const result = await parser.parseFile('docs/api.md');
*
* console.log(`File: ${result.filePath}`);
* console.log(`Links: ${result.links.length}`);
* console.log(`References: ${result.references.length}`);
* ```
*
* @param filePath - Path to the markdown file to parse
*
* @returns Promise resolving to comprehensive file analysis
*/
async parseFile(filePath) {
const absolutePath = resolve(filePath);
const content = await readFile(absolutePath, 'utf-8');
const tree = this.processor.parse(content);
const links = [];
const references = [];
// Extract link references/definitions
visit(tree, 'definition', (node) => {
if (node.position) {
references.push({
id: node.identifier,
url: node.url,
title: node.title || undefined,
line: node.position.start.line,
});
}
});
// Extract Claude import links from text nodes
visit(tree, 'text', (node) => {
if (!node.position)
return;
const claudeImportRegex = /@([^\s\n]+)/g;
let match;
while ((match = claudeImportRegex.exec(node.value)) !== null) {
const importPath = match[1];
const link = {
type: 'claude-import',
href: importPath,
text: match[0], // Full "@path" text
referenceId: undefined,
line: node.position.start.line,
column: node.position.start.column + match.index,
absolute: importPath.startsWith('/') || importPath.startsWith('~'),
};
// Resolve Claude import paths
link.resolvedPath = this.resolveClaudeImportPath(importPath, dirname(absolutePath));
links.push(link);
}
});
// Extract links and images
const processLinkNode = (node) => {
if (!node.position)
return;
let href;
let text;
let referenceId;
let linkType;
if (node.type === 'link' || node.type === 'image') {
href = node.url || '';
linkType = node.type === 'image' ? 'image' : this.determineLinkType(href);
if (node.type === 'image') {
text = node.alt || undefined;
}
else if (node.children) {
text = node.children
.filter((child) => child.type === 'text')
.map((child) => child.value)
.join('');
}
}
else {
// Reference-style links
referenceId = node.identifier;
const reference = references.find((ref) => ref.id === referenceId);
href = reference?.url || '';
linkType = node.type === 'imageReference' ? 'image' : 'reference';
if (node.type === 'imageReference') {
text = node.alt || undefined;
}
else if (node.children) {
text = node.children
.filter((child) => child.type === 'text')
.map((child) => child.value)
.join('');
}
}
const link = {
type: linkType,
href,
text,
referenceId: referenceId || undefined,
line: node.position.start.line,
column: node.position.start.column,
absolute: isAbsolute(href),
};
// Resolve internal links and image paths
if (linkType === 'internal' || linkType === 'image') {
link.resolvedPath = this.resolveInternalPath(href, dirname(absolutePath));
}
links.push(link);
};
visit(tree, 'link', processLinkNode);
visit(tree, 'image', processLinkNode);
visit(tree, 'linkReference', processLinkNode);
visit(tree, 'imageReference', processLinkNode);
const dependencies = this.extractDependencies(links);
return {
filePath: absolutePath,
links,
references,
dependencies,
dependents: [], // Will be populated by DependencyGraph
};
}
determineLinkType(href) {
if (!href)
return 'internal';
// External links (http/https/ftp/mailto)
if (/^(https?|ftp|mailto):/i.test(href)) {
return 'external';
}
// Anchor links (starting with #)
if (href.startsWith('#')) {
return 'anchor';
}
// Internal links (relative or absolute file paths)
return 'internal';
}
resolveInternalPath(href, baseDir) {
// Remove anchor fragments
const pathPart = href.split('#')[0];
if (isAbsolute(pathPart)) {
return pathPart;
}
return resolve(join(baseDir, pathPart));
}
resolveClaudeImportPath(importPath, baseDir) {
// Handle home directory paths (~)
if (importPath.startsWith('~/')) {
const { homedir } = require('node:os');
return resolve(join(homedir(), importPath.slice(2)));
}
// Handle absolute paths
if (isAbsolute(importPath)) {
return importPath;
}
// Handle relative paths
return resolve(join(baseDir, importPath));
}
extractDependencies(links) {
return links
.filter((link) => (link.type === 'internal' || link.type === 'claude-import' || link.type === 'image') &&
link.resolvedPath)
.map((link) => link.resolvedPath)
.filter((path) => path !== undefined)
.filter((path, index, arr) => arr.indexOf(path) === index); // Remove duplicates
}
async parseDirectory(dirPath, extensions = ['.md', '.markdown', '.mdx']) {
const files = await this.findMarkdownFiles(dirPath, extensions);
const results = await Promise.allSettled(files.map((file) => this.parseFile(file)));
return results
.filter((result) => result.status === 'fulfilled')
.map((result) => result.value);
}
async findMarkdownFiles(dirPath, extensions) {
const files = [];
const processDirectory = async (currentDir) => {
const entries = await readdir(currentDir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(currentDir, entry.name);
if (entry.isDirectory()) {
await processDirectory(fullPath);
}
else if (entry.isFile() && extensions.includes(extname(entry.name))) {
files.push(fullPath);
}
}
};
await processDirectory(resolve(dirPath));
return files;
}
}
//# sourceMappingURL=link-parser.js.map