@kayvan/markdown-tree-parser
Version:
A powerful JavaScript library and CLI tool for parsing and manipulating markdown files as tree structures using the remark/unified ecosystem
1,085 lines (920 loc) • 33.1 kB
JavaScript
/**
* md-tree CLI - Command line interface for markdown-tree-parser
*
* A powerful CLI tool for parsing and manipulating markdown files as tree structures.
*/
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { MarkdownTreeParser } from '../lib/markdown-parser.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const packagePath = path.join(__dirname, '..', 'package.json');
// Constants
const PATTERNS = {
HEADING: /^(#{1,6})(\s+.*)$/,
HEADING_LEVEL_1_5: /^(#{1,5})(\s+.*)$/,
LEVEL_1_HEADING: /^# /,
LEVEL_2_HEADING: /^## /,
TOC_LINK: /\[([^\]]+)\]\(\.\/([^#)]+)(?:#[^)]*)?\)/,
LEVEL_2_TOC_ITEM: /^ {2}[-*] \[/,
EMAIL: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
};
const LIMITS = {
MAX_HEADING_LEVEL: 6,
MIN_HEADING_LEVEL: 1,
MAX_HEADING_LEVEL_FOR_ADJUSTMENT: 5,
};
const MESSAGES = {
FILE_NOT_FOUND: '❌ File not found',
WRITE_SUCCESS: '✅ Written to',
PROCESSING: '✅ Processing',
NO_SECTIONS_FOUND: '⚠️ No sections found',
WARNING: '⚠️ Warning',
ERROR: '❌ Error',
USAGE_LIST: '❌ Usage: md-tree list <file>',
USAGE_EXTRACT: '❌ Usage: md-tree extract <file> <heading>',
USAGE_EXTRACT_ALL: '❌ Usage: md-tree extract-all <file> [level]',
USAGE_EXPLODE: '❌ Usage: md-tree explode <file> <output-directory>',
USAGE_ASSEMBLE: '❌ Usage: md-tree assemble <directory> <output-file>',
USAGE_TREE: '❌ Usage: md-tree tree <file>',
USAGE_SEARCH: '❌ Usage: md-tree search <file> <selector>',
USAGE_STATS: '❌ Usage: md-tree stats <file>',
USAGE_TOC: '❌ Usage: md-tree toc <file>',
USAGE_CHECK_LINKS: '❌ Usage: md-tree check-links <file>',
INDEX_NOT_FOUND: 'index.md not found in',
NO_MAIN_TITLE: 'No main title found in index.md',
NO_SECTION_FILES: 'No section files found in TOC',
SECTION_ARROW: '→',
TOC_CREATED: 'Table of Contents → index.md',
};
class MarkdownCLI {
constructor() {
this.parser = new MarkdownTreeParser();
}
async getVersion() {
try {
const packageJson = JSON.parse(await fs.readFile(packagePath, 'utf-8'));
return packageJson.version;
} catch {
return 'unknown';
}
}
async readFile(filePath) {
try {
// Resolve relative paths
const resolvedPath = path.resolve(filePath);
return await fs.readFile(resolvedPath, 'utf-8');
} catch (error) {
console.error(
`${MESSAGES.ERROR} reading file ${filePath}:`,
error.message
);
process.exit(1);
}
}
async writeFile(filePath, content) {
try {
const resolvedPath = path.resolve(filePath);
await fs.writeFile(resolvedPath, content, 'utf-8');
console.log(
`${MESSAGES.WRITE_SUCCESS} ${path.relative(process.cwd(), resolvedPath)}`
);
} catch (error) {
console.error(
`${MESSAGES.ERROR} writing file ${filePath}:`,
error.message
);
process.exit(1);
}
}
/**
* Sanitize text for use in filenames or URL anchors
* @param {string} text - Text to sanitize
* @returns {string} Sanitized text
*/
sanitizeText(text) {
return text
.toLowerCase()
.replace(
/[^a-z0-9\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af\s-]/g,
''
)
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
}
// Alias for backward compatibility and semantic clarity
sanitizeFilename(text) {
return this.sanitizeText(text);
}
// Alias for backward compatibility and semantic clarity
createAnchor(text) {
return this.sanitizeText(text);
}
async showUsage() {
const version = await this.getVersion();
console.log(`
📚 md-tree v${version} - Markdown Tree Parser CLI
Usage: md-tree <command> <file> [options]
Commands:
list <file> List all headings in the file
extract <file> <heading> Extract a specific section by heading text
extract-all <file> [level] Extract all sections at level (default: 2)
explode <file> <output-dir> Extract all level 2 sections and create index
assemble <dir> <output-file> Reassemble exploded document from directory
tree <file> Show the document structure as a tree
search <file> <selector> Search using CSS-like selectors
stats <file> Show document statistics
toc <file> Generate table of contents
check-links <file> Verify that links are reachable
version Show version information
help Show this help message
Options:
--output, -o <dir> Output directory for extracted files
--level, -l <number> Heading level to work with
--format, -f <json|text> Output format (default: text)
--max-level <number> Maximum heading level for TOC (default: 3)
--recursive, -r Recursively check linked markdown files
Examples:
md-tree list README.md
md-tree extract README.md "Installation"
md-tree extract-all README.md 2 --output ./sections
md-tree explode README.md ./exploded
md-tree assemble ./exploded reassembled.md
md-tree tree README.md
md-tree search README.md "heading[depth=2]"
md-tree stats README.md
md-tree toc README.md --max-level 2
For more information, visit: https://github.com/ksylvan/markdown-tree-parser
`);
}
async listHeadings(filePath, format = 'text') {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const headings = this.parser.getHeadingsList(tree);
if (format === 'json') {
console.log(
JSON.stringify(
headings.map((h) => ({
level: h.level,
text: h.text,
})),
null,
2
)
);
} else {
console.log(
`\n📋 Headings in ${path.basename(filePath)} (${headings.length} total):\n`
);
headings.forEach((h, _index) => {
const indent = ' '.repeat(h.level - 1);
const icon = h.level === 1 ? '📁' : h.level === 2 ? '📄' : '📃';
console.log(`${indent}${icon} ${h.text}`);
});
}
}
async extractSection(filePath, headingText, outputDir = null) {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const section = this.parser.extractSection(tree, headingText);
if (!section) {
console.error(
`${MESSAGES.ERROR} Section "${headingText}" not found in ${path.basename(filePath)}`
);
// Suggest similar headings
const headings = this.parser.getHeadingsList(tree);
const suggestions = headings
.filter((h) =>
h.text
.toLowerCase()
.includes(headingText.toLowerCase().substring(0, 3))
)
.slice(0, 3);
if (suggestions.length > 0) {
console.log('\n💡 Did you mean one of these?');
for (const h of suggestions) {
console.log(` - "${h.text}"`);
}
}
process.exit(1);
}
const markdown = await this.parser.stringify(section);
if (outputDir) {
const filename = `${this.sanitizeFilename(headingText)}.md`;
const outputPath = path.join(outputDir, filename);
await fs.mkdir(outputDir, { recursive: true });
await this.writeFile(outputPath, markdown);
} else {
console.log(`\n📄 Section "${headingText}":\n`);
console.log(markdown);
}
}
async extractAllSections(filePath, level = 2, outputDir = null) {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const sections = this.parser.extractAllSections(tree, level);
if (sections.length === 0) {
console.log(
`${MESSAGES.NO_SECTIONS_FOUND} at level ${level} in ${path.basename(filePath)}`
);
return;
}
console.log(
`\n📚 Found ${sections.length} sections at level ${level} in ${path.basename(filePath)}:\n`
);
if (outputDir) {
await fs.mkdir(outputDir, { recursive: true });
}
for (let i = 0; i < sections.length; i++) {
const section = sections[i];
const headingText = section.headingText;
const markdown = await this.parser.stringify(section.tree);
console.log(`${i + 1}. ${headingText}`);
if (outputDir) {
const filename = `${String(i + 1).padStart(2, '0')}-${this.sanitizeFilename(headingText)}.md`;
const outputPath = path.join(outputDir, filename);
await this.writeFile(outputPath, markdown);
} else {
console.log(`\n${'─'.repeat(50)}`);
console.log(markdown);
console.log(`${'─'.repeat(50)}\n`);
}
}
if (outputDir) {
console.log(`\n✨ All sections extracted to ${outputDir}`);
}
}
async showTree(filePath) {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const headings = this.parser.getHeadingsList(tree);
if (headings.length === 0) {
console.log(`📄 ${path.basename(filePath)} has no headings`);
return;
}
console.log(`\n🌳 Document structure for ${path.basename(filePath)}:\n`);
for (const heading of headings) {
const indent = ' '.repeat(heading.level - 1);
const icon =
heading.level === 1 ? '📁' : heading.level === 2 ? '📄' : '📃';
console.log(`${indent}${icon} ${heading.text}`);
}
}
async searchNodes(filePath, selector, format = 'text') {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const nodes = this.parser.selectAll(tree, selector);
if (format === 'json') {
console.log(JSON.stringify(nodes, null, 2));
} else {
console.log(
`\n🔍 Found ${nodes.length} nodes matching "${selector}" in ${path.basename(filePath)}:\n`
);
if (nodes.length === 0) {
console.log('No matches found.');
return;
}
nodes.forEach((node, index) => {
console.log(`${index + 1}. Type: ${node.type}`);
if (node.type === 'heading') {
console.log(` Text: "${this.parser.getHeadingText(node)}"`);
console.log(` Level: ${node.depth}`);
} else if (node.type === 'text') {
const preview = node.value.slice(0, 100);
console.log(
` Value: "${preview}${node.value.length > 100 ? '...' : ''}"`
);
} else if (node.type === 'link') {
console.log(` URL: ${node.url}`);
if (node.title) console.log(` Title: ${node.title}`);
}
console.log();
});
}
}
async showStats(filePath) {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const stats = this.parser.getStats(tree);
console.log(`\n📊 Statistics for ${path.basename(filePath)}:\n`);
console.log(`📝 Word count: ${stats.wordCount.toLocaleString()}`);
console.log(`📋 Paragraphs: ${stats.paragraphs}`);
console.log(`📁 Headings: ${stats.headings.total}`);
if (Object.keys(stats.headings.byLevel).length > 0) {
console.log(' By level:');
for (const [level, count] of Object.entries(stats.headings.byLevel).sort(
([a], [b]) => Number.parseInt(a, 10) - Number.parseInt(b, 10)
)) {
console.log(` Level ${level}: ${count}`);
}
}
console.log(`💻 Code blocks: ${stats.codeBlocks}`);
console.log(`📌 Lists: ${stats.lists}`);
console.log(`🔗 Links: ${stats.links}`);
console.log(`🖼️ Images: ${stats.images}`);
}
async generateTOC(filePath, maxLevel = 3) {
const content = await this.readFile(filePath);
const tree = await this.parser.parse(content);
const toc = this.parser.generateTableOfContents(tree, maxLevel);
if (!toc) {
console.log(
`${MESSAGES.WARNING} No headings found in ${path.basename(filePath)} to generate TOC`
);
return;
}
console.log(`\n📚 Table of Contents for ${path.basename(filePath)}:\n`);
console.log(toc);
}
async checkLinks(filePath, recursive = false, visited = new Set()) {
const resolvedPath = path.resolve(filePath);
if (visited.has(resolvedPath)) return;
visited.add(resolvedPath);
const content = await this.readFile(resolvedPath);
const tree = await this.parser.parse(content);
const links = this.parser.selectAll(tree, 'link');
const definitions = this.parser.selectAll(tree, 'definition');
const allUrls = [];
for (const link of links) {
allUrls.push(link.url);
}
for (const definition of definitions) {
allUrls.push(definition.url);
}
const uniqueUrls = new Set(allUrls);
console.log(
`\n🔗 Checking ${uniqueUrls.size} unique URLs in ${path.basename(resolvedPath)}:`
);
for (const url of uniqueUrls) {
if (!url || url.startsWith('#')) {
continue;
}
// Show email links but mark as skipped
if (url.startsWith('mailto:') || PATTERNS.EMAIL.test(url)) {
console.log(`⏭️ ${url} (email - skipped)`);
continue;
}
if (/^https?:\/\//i.test(url)) {
try {
const res = await globalThis.fetch(url, { method: 'HEAD' });
if (res.ok) {
console.log(`✅ ${url}`);
} else {
console.log(`❌ ${url} (${res.status})`);
}
} catch (err) {
console.log(`❌ ${url} (${err.message})`);
}
} else {
const target = path.resolve(
path.dirname(resolvedPath),
url.split('#')[0]
);
try {
await fs.access(target);
console.log(`✅ ${url}`);
if (recursive && /\.md$/i.test(target)) {
await this.checkLinks(target, true, visited);
}
} catch {
console.log(`❌ ${url} (file not found)`);
}
}
}
}
parseArgs() {
const args = process.argv.slice(2);
if (args.length === 0) {
return { command: 'help', args: [], options: {} };
}
const command = args[0];
const options = {
output: null,
level: 2,
format: 'text',
maxLevel: 3,
recursive: false,
};
// Parse flags
const filteredArgs = [];
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg === '--output' || arg === '-o') {
options.output = args[i + 1];
i++; // skip next arg
} else if (arg === '--level' || arg === '-l') {
options.level = Number.parseInt(args[i + 1], 10) || 2;
i++; // skip next arg
} else if (arg === '--format' || arg === '-f') {
options.format = args[i + 1] || 'text';
i++; // skip next arg
} else if (arg === '--max-level') {
options.maxLevel = Number.parseInt(args[i + 1], 10) || 3;
i++; // skip next arg
} else if (arg === '--recursive' || arg === '-r') {
options.recursive = true;
} else if (!arg.startsWith('-')) {
filteredArgs.push(arg);
}
}
return { command, args: filteredArgs, options };
}
// Command handlers
async handleVersionCommand() {
const version = await this.getVersion();
console.log(`md-tree v${version}`);
}
async handleHelpCommand() {
await this.showUsage();
}
async handleListCommand(args, options) {
if (args.length < 2) {
console.error(MESSAGES.USAGE_LIST);
process.exit(1);
}
await this.listHeadings(args[1], options.format);
}
async handleExtractCommand(args, options) {
if (args.length < 3) {
console.error(MESSAGES.USAGE_EXTRACT);
process.exit(1);
}
await this.extractSection(args[1], args[2], options.output);
}
async handleExtractAllCommand(args, options) {
if (args.length < 2) {
console.error(MESSAGES.USAGE_EXTRACT_ALL);
process.exit(1);
}
const level = args[2] ? Number.parseInt(args[2], 10) : options.level;
await this.extractAllSections(args[1], level, options.output);
}
async handleExplodeCommand(args) {
if (args.length < 3) {
console.error(MESSAGES.USAGE_EXPLODE);
process.exit(1);
}
await this.explodeDocument(args[1], args[2]);
}
async handleAssembleCommand(args) {
if (args.length < 3) {
console.error(MESSAGES.USAGE_ASSEMBLE);
process.exit(1);
}
await this.assembleDocument(args[1], args[2]);
}
async handleTreeCommand(args) {
if (args.length < 2) {
console.error(MESSAGES.USAGE_TREE);
process.exit(1);
}
await this.showTree(args[1]);
}
async handleSearchCommand(args, options) {
if (args.length < 3) {
console.error(MESSAGES.USAGE_SEARCH);
process.exit(1);
}
await this.searchNodes(args[1], args[2], options.format);
}
async handleStatsCommand(args) {
if (args.length < 2) {
console.error(MESSAGES.USAGE_STATS);
process.exit(1);
}
await this.showStats(args[1]);
}
async handleTocCommand(args, options) {
if (args.length < 2) {
console.error(MESSAGES.USAGE_TOC);
process.exit(1);
}
await this.generateTOC(args[1], options.maxLevel);
}
async handleCheckLinksCommand(args, options) {
if (args.length < 2) {
console.error(MESSAGES.USAGE_CHECK_LINKS);
process.exit(1);
}
await this.checkLinks(args[1], options.recursive);
}
async run() {
const { command, args, options } = this.parseArgs();
try {
switch (command) {
case 'version':
await this.handleVersionCommand();
break;
case 'help':
await this.handleHelpCommand();
break;
case 'list':
await this.handleListCommand(args, options);
break;
case 'extract':
await this.handleExtractCommand(args, options);
break;
case 'extract-all':
await this.handleExtractAllCommand(args, options);
break;
case 'explode':
await this.handleExplodeCommand(args);
break;
case 'assemble':
await this.handleAssembleCommand(args);
break;
case 'tree':
await this.handleTreeCommand(args);
break;
case 'search':
await this.handleSearchCommand(args, options);
break;
case 'stats':
await this.handleStatsCommand(args);
break;
case 'toc':
await this.handleTocCommand(args, options);
break;
case 'check-links':
await this.handleCheckLinksCommand(args, options);
break;
default:
console.error(`${MESSAGES.ERROR} Unknown command: ${command}`);
console.log('Run "md-tree help" for usage information.');
process.exit(1);
}
} catch (error) {
console.error(`${MESSAGES.ERROR}:`, error.message);
if (process.env.DEBUG) {
console.error(error.stack);
}
process.exit(1);
}
}
async explodeDocument(filePath, outputDir) {
// Use the text-based approach for perfect round-trip compatibility
return await this.explodeDocumentTextBased(filePath, outputDir);
}
// Text-based explode that preserves original formatting exactly
async explodeDocumentTextBased(filePath, outputDir) {
const content = await this.readFile(filePath);
const lines = content.split('\n');
// Find all level 2 headings and their positions
const sections = [];
let currentSection = null;
let inCodeBlock = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const trimmed = line.trim();
// Track fenced code blocks and ignore headings within them
if (trimmed.startsWith('```') || trimmed.startsWith('~~~')) {
if (currentSection) {
currentSection.lines.push(line);
}
inCodeBlock = !inCodeBlock;
continue;
}
if (inCodeBlock) {
if (currentSection) {
currentSection.lines.push(line);
}
continue;
}
// Check for main title (level 1)
if (line.match(/^# /)) {
if (currentSection) {
currentSection.endLine = i - 1;
sections.push(currentSection);
}
currentSection = null;
continue;
}
// Check for level 2 heading (section start)
if (line.match(/^## /)) {
if (currentSection) {
currentSection.endLine = i - 1;
sections.push(currentSection);
}
currentSection = {
headingText: line.replace(/^## /, ''),
startLine: i,
endLine: null,
lines: [],
};
continue;
}
// Add line to current section if we're in one
if (currentSection) {
currentSection.lines.push(line);
}
}
// Don't forget the last section
if (currentSection) {
currentSection.endLine = lines.length - 1;
sections.push(currentSection);
}
if (sections.length === 0) {
console.log(
`${MESSAGES.NO_SECTIONS_FOUND} at level 2 in ${path.basename(filePath)}`
);
return;
}
// Create output directory
await fs.mkdir(outputDir, { recursive: true });
console.log(
`\n📚 Exploding ${sections.length} sections from ${path.basename(filePath)} to ${outputDir}:\n`
);
// Keep track of section filenames for index generation
const sectionFiles = [];
// Extract each section to its own file
for (const section of sections) {
const headingText = section.headingText;
// Make main section heading Level 1, and decrement all subsection headings by one level
const decrementedLines = section.lines.map((line) => {
// Check if line is a heading (starts with #)
const headingMatch = line.match(/^(#{2,6})(\s+.*)$/);
if (headingMatch) {
const [, hashes, rest] = headingMatch;
// Remove one # to decrease the level (level 3 becomes level 2, etc.)
return hashes.slice(1) + rest;
}
return line;
});
const sectionLines = [`# ${headingText}`, ...decrementedLines];
const adjustedContent = sectionLines.join('\n');
// Generate filename without numbered prefix
const filename = `${this.sanitizeFilename(headingText)}.md`;
const outputPath = path.join(outputDir, filename);
sectionFiles.push({
filename,
headingText,
});
await this.writeFile(outputPath, adjustedContent);
console.log(
`${MESSAGES.PROCESSING} ${headingText} ${MESSAGES.SECTION_ARROW} ${filename}`
);
}
// Parse content with AST to generate rich TOC with all subsections
const tree = await this.parser.parse(content);
const indexContent = await this.generateIndexContentWithSubsections(
tree,
sectionFiles
);
const indexPath = path.join(outputDir, 'index.md');
await this.writeFile(indexPath, indexContent);
console.log(`${MESSAGES.PROCESSING} ${MESSAGES.TOC_CREATED}`);
console.log(
`\n✨ Document exploded to ${outputDir} (${sectionFiles.length + 1} files)`
);
}
async generateIndexContent(tree, sectionFiles) {
// Use the enhanced AST-based approach to include all subsections
return await this.generateIndexContentWithSubsections(tree, sectionFiles);
}
// Enhanced index generation with all subsections using AST
async generateIndexContentWithSubsections(tree, sectionFiles) {
const headings = this.parser.getHeadingsList(tree);
const mainTitle = headings.find((h) => h.level === 1);
if (!mainTitle) {
return await this.generateIndexContentTextBased(
await this.parser.stringify(tree),
sectionFiles
);
}
// Create a map of section names to filenames for quick lookup
const sectionMap = new Map();
for (const file of sectionFiles) {
sectionMap.set(file.headingText.toLowerCase(), file.filename);
}
// Start with title and TOC heading
let toc = `# ${mainTitle.text}\n\n## Table of Contents\n\n`;
// Add the main title link
toc += `- [${mainTitle.text}](#table-of-contents)\n`;
// Process all headings to create nested TOC
let currentLevel2Filename = null;
for (const heading of headings) {
// Skip the main title (level 1)
if (heading.level === 1) {
continue;
}
if (heading.level === 2) {
// This is a main section
currentLevel2Filename = sectionMap.get(heading.text.toLowerCase());
if (currentLevel2Filename) {
toc += ` - [${heading.text}](./${currentLevel2Filename})\n`;
} else {
toc += ` - [${heading.text}](#${this.createAnchor(heading.text)})\n`;
}
} else if (heading.level > 2 && currentLevel2Filename) {
// This is a subsection within a level 2 section
const indent = ' '.repeat(heading.level - 1);
const anchor = this.createAnchor(heading.text);
toc += `${indent}- [${heading.text}](./${currentLevel2Filename}#${anchor})\n`;
}
}
return toc;
}
/**
* Adjust heading levels in markdown content
* @param {string} content - Markdown content
* @param {number} adjustment - Number of levels to adjust (+1 to increase level, -1 to decrease level)
* @returns {string} Content with adjusted heading levels
*/
adjustHeadingLevels(content, adjustment) {
const lines = content.split('\n');
let inCodeBlock = false;
const adjustedLines = lines.map((line) => {
// Check for code block boundaries (``` or ~~~)
if (line.trim().startsWith('```') || line.trim().startsWith('~~~')) {
inCodeBlock = !inCodeBlock;
return line;
}
// Skip heading adjustment if we're inside a code block
if (inCodeBlock) {
return line;
}
const headingMatch = line.match(PATTERNS.HEADING_LEVEL_1_5);
if (headingMatch) {
const [, hashes, rest] = headingMatch;
const currentLevel = hashes.length;
const newLevel = currentLevel + adjustment;
// Ensure we stay within valid heading level bounds (1-6)
if (
newLevel >= LIMITS.MIN_HEADING_LEVEL &&
newLevel <= LIMITS.MAX_HEADING_LEVEL
) {
return '#'.repeat(newLevel) + rest;
}
}
return line;
});
return adjustedLines.join('\n');
}
// Convenience methods for backward compatibility
decrementAllHeadingLevelsInText(content) {
return this.adjustHeadingLevels(content, 1); // Add # (decrease logical level)
}
incrementHeadingLevelsInText(content) {
return this.adjustHeadingLevels(content, 1); // Add # (increase logical level)
}
// Generate index content preserving original spacing
async generateIndexContentTextBased(originalContent, sectionFiles) {
const lines = originalContent.split('\n');
// Find the main title
let mainTitle = 'Table of Contents';
for (const line of lines) {
if (line.match(/^# /)) {
mainTitle = line.replace(/^# /, '');
break;
}
}
// Create a map of section names to filenames for quick lookup
const sectionMap = new Map();
for (const file of sectionFiles) {
sectionMap.set(file.headingText.toLowerCase(), file.filename);
}
// Start with title and TOC heading, preserving original spacing
let toc = `# ${mainTitle}\n\n## Table of Contents\n\n`;
// Add the main title link
toc += `- [${mainTitle}](#table-of-contents)\n`;
// Add links for each section
for (const file of sectionFiles) {
toc += ` - [${file.headingText}](./${file.filename})\n`;
}
return toc;
}
findParentLevel2Heading(headings, targetHeading) {
const targetIndex = headings.indexOf(targetHeading);
// Look backwards for the most recent level 2 heading
for (let i = targetIndex - 1; i >= 0; i--) {
if (headings[i].level === 2) {
return headings[i];
}
// If we hit a level 1 heading, stop looking
if (headings[i].level === 1) {
break;
}
}
return null;
}
// Helper method to decrement all heading levels in a tree by 1
decrementHeadingLevels(tree) {
if (!tree || !tree.children) return tree;
// Create a deep copy to avoid modifying the original tree
const clonedTree = JSON.parse(JSON.stringify(tree));
const decrementNode = (node) => {
if (node.type === 'heading' && node.depth > 1) {
node.depth = node.depth - 1;
}
if (node.children) {
node.children.forEach(decrementNode);
}
};
if (clonedTree.children) {
clonedTree.children.forEach(decrementNode);
}
return clonedTree;
}
// Helper method to increment all heading levels in a tree by 1
incrementHeadingLevels(tree) {
if (!tree || !tree.children) return tree;
// Create a deep copy to avoid modifying the original tree
const clonedTree = JSON.parse(JSON.stringify(tree));
const incrementNode = (node) => {
if (node.type === 'heading' && node.depth < 6) {
node.depth = node.depth + 1;
}
if (node.children) {
node.children.forEach(incrementNode);
}
};
if (clonedTree.children) {
clonedTree.children.forEach(incrementNode);
}
return clonedTree;
}
async assembleDocument(inputDir, outputFile) {
const indexPath = path.join(inputDir, 'index.md');
// Check if index.md exists
try {
await fs.access(indexPath);
} catch {
console.error(
`${MESSAGES.ERROR} ${MESSAGES.INDEX_NOT_FOUND} ${inputDir}`
);
process.exit(1);
}
const indexContent = await this.readFile(indexPath);
const indexTree = await this.parser.parse(indexContent);
// Extract the main title and get the list of section files from TOC
const headings = this.parser.getHeadingsList(indexTree);
const mainTitle = headings.find((h) => h.level === 1);
if (!mainTitle) {
console.error(`${MESSAGES.ERROR} ${MESSAGES.NO_MAIN_TITLE}`);
process.exit(1);
}
console.log(`\n📚 Assembling document: ${mainTitle.text}`);
// Parse the TOC to extract section file references
const sectionFiles = await this.extractSectionFilesFromTOC(indexTree);
if (sectionFiles.length === 0) {
console.error(`${MESSAGES.ERROR} ${MESSAGES.NO_SECTION_FILES}`);
process.exit(1);
}
console.log(`📖 Found ${sectionFiles.length} sections to assemble`);
// Start building the reassembled document
let assembledContent = `# ${mainTitle.text}\n`;
// Process each section file
for (const sectionFile of sectionFiles) {
console.log(`${MESSAGES.PROCESSING} ${sectionFile.filename}...`);
const filePath = path.join(inputDir, sectionFile.filename);
try {
const sectionContent = await this.readFile(filePath);
// Increment all heading levels back up to match original document structure
const adjustedContent =
this.incrementHeadingLevelsInText(sectionContent);
// Add the section content:
// - After main title: blank line then content (original has blank line after title)
// - Between sections: direct concatenation (original has no spacing between sections)
assembledContent += `\n${adjustedContent}`;
} catch {
console.error(
`${MESSAGES.WARNING}: Could not read ${sectionFile.filename}, skipping...`
);
}
}
// Write the assembled document
await this.writeFile(outputFile, assembledContent);
console.log(`\n✨ Document assembled to ${outputFile}`);
} // Method to increment ALL heading levels directly in text (shift all headings up one level)
async extractSectionFilesFromTOC(indexTree) {
// Convert the tree back to markdown to parse the TOC links
const indexMarkdown = await this.parser.stringify(indexTree);
const lines = indexMarkdown.split('\n');
const sectionFiles = [];
const processedFiles = new Set();
for (const line of lines) {
// Look for TOC lines that reference files (not just anchors)
const match = line.match(/\[([^\]]+)\]\(\.\/([^#)]+)(?:#[^)]*)?\)/);
if (match) {
const [, linkText, filename] = match;
// Only include level 2 sections (main sections, not sub-sections)
// Level 2 items have exactly 2 spaces before the dash (are children of main heading)
// Level 3+ items have 4+ spaces (are nested deeper)
if (line.match(/^ {2}[-*] \[/) && !processedFiles.has(filename)) {
sectionFiles.push({
filename,
title: linkText,
});
processedFiles.add(filename);
}
}
}
return sectionFiles;
}
}
// Export the class for testing
export { MarkdownCLI };
const cli = new MarkdownCLI();
cli.run();