markmv
Version:
TypeScript CLI for markdown file operations with intelligent link refactoring
391 lines • 16.8 kB
JavaScript
import { basename, dirname, join } from 'node:path';
import { HeaderBasedSplitStrategy, LineBasedSplitStrategy, ManualSplitStrategy, SizeBasedSplitStrategy, } from '../strategies/split-strategies.js';
import { FileUtils } from '../utils/file-utils.js';
import { PathUtils } from '../utils/path-utils.js';
import { TransactionManager } from '../utils/transaction-manager.js';
import { LinkParser } from './link-parser.js';
/**
* Splits large markdown files into smaller, manageable sections using various strategies.
*
* The ContentSplitter provides intelligent content division with support for header-based,
* size-based, manual marker-based, and line-based splitting strategies. It handles link
* redistribution, maintains content integrity, and ensures proper cross-references between the
* resulting files.
*
* @category Core
*
* @example
* Header-based splitting
* ```typescript
* const splitter = new ContentSplitter();
* const result = await splitter.splitFile('large-guide.md', {
* strategy: 'headers',
* headerLevel: 2,
* outputDir: './split-guides/',
* preserveLinks: true
* });
*
* console.log(`Created ${result.createdFiles.length} files`);
* ```
*
* @example
* Size-based splitting
* ```typescript
* const splitter = new ContentSplitter();
* const result = await splitter.splitFile('large-document.md', {
* strategy: 'size',
* maxSize: '50KB',
* outputDir: './chunks/',
* dryRun: true // Preview without creating files
* });
* ```
*/
export class ContentSplitter {
linkParser = new LinkParser();
// private linkRefactorer = new LinkRefactorer();
/**
* Splits a markdown file into multiple smaller files using the specified strategy.
*
* This method analyzes the source file content and divides it into logical sections based on the
* chosen strategy. It handles link redistribution, maintains proper cross-references, and ensures
* content integrity across the split files.
*
* @example
* Basic header splitting
* ```typescript
* const result = await splitter.splitFile('documentation.md', {
* strategy: 'headers',
* headerLevel: 1, // Split on H1 headers
* outputDir: './docs-sections/',
* preserveLinks: true
* });
* ```
*
* @example
* Manual marker splitting
* ```typescript
* const result = await splitter.splitFile('article.md', {
* strategy: 'manual',
* markers: ['<!-- split -->', '---split---'],
* outputDir: './article-parts/'
* });
* ```
*
* @param sourceFilePath - Path to the markdown file to split
* @param options - Configuration options for the split operation
*
* @returns Promise resolving to operation result with details of created files
*/
async splitFile(sourceFilePath, options) {
const { strategy = 'headers', outputDir, dryRun = false, verbose = false } = options;
try {
// Validate input
if (!(await FileUtils.exists(sourceFilePath))) {
return {
success: false,
modifiedFiles: [],
createdFiles: [],
deletedFiles: [],
errors: [`Source file does not exist: ${sourceFilePath}`],
warnings: [],
changes: [],
};
}
if (!PathUtils.isMarkdownFile(sourceFilePath)) {
return {
success: false,
modifiedFiles: [],
createdFiles: [],
deletedFiles: [],
errors: ['Source file must be a markdown file'],
warnings: [],
changes: [],
};
}
// Read and parse the source file
const content = await FileUtils.readTextFile(sourceFilePath);
const parsedFile = await this.linkParser.parseFile(sourceFilePath);
if (verbose) {
console.log(`Parsing file: ${sourceFilePath}`);
console.log(`Found ${parsedFile.links.length} links`);
}
// Get the output directory
const outputDirectory = outputDir || dirname(sourceFilePath);
const sourceFilename = basename(sourceFilePath);
// Create split strategy
const splitStrategy = this.createSplitStrategy(strategy, {
...options,
outputDir: outputDirectory,
});
// Perform the split
const splitResult = await splitStrategy.split(content, sourceFilename);
if (splitResult.errors.length > 0) {
return {
success: false,
modifiedFiles: [],
createdFiles: [],
deletedFiles: [],
errors: splitResult.errors,
warnings: splitResult.warnings,
changes: [],
};
}
if (splitResult.sections.length === 0) {
return {
success: false,
modifiedFiles: [],
createdFiles: [],
deletedFiles: [],
errors: ['No sections were created during split'],
warnings: splitResult.warnings,
changes: [],
};
}
// Redistribute links across sections
const redistributionResult = await this.redistributeLinks(splitResult, parsedFile, outputDirectory);
if (verbose) {
console.log(`Split into ${redistributionResult.updatedSections.length} sections`);
console.log(`${redistributionResult.externalLinkUpdates.length} external links need updating`);
}
// Prepare transaction
const transaction = new TransactionManager({
createBackups: !dryRun,
continueOnError: false,
});
const changes = [];
const createdFiles = [];
const modifiedFiles = [];
const warnings = [...splitResult.warnings, ...redistributionResult.errors];
// Plan section file creation
for (const section of redistributionResult.updatedSections) {
const filePath = join(outputDirectory, section.filename);
createdFiles.push(filePath);
changes.push({
type: 'file-created',
filePath,
newValue: section.content,
});
if (!dryRun) {
transaction.addFileCreate(filePath, section.content, `Create split section: ${section.title}`);
}
}
// Plan original file update (if there's remaining content)
if (splitResult.remainingContent) {
modifiedFiles.push(sourceFilePath);
changes.push({
type: 'content-modified',
filePath: sourceFilePath,
newValue: splitResult.remainingContent,
});
if (!dryRun) {
transaction.addContentUpdate(sourceFilePath, splitResult.remainingContent, 'Update original file with remaining content');
}
}
// Handle external file updates (files that link to the split file)
const externalFiles = await this.findExternalReferences(sourceFilePath);
for (const externalFile of externalFiles) {
const updatedContent = await this.updateExternalFileLinks(externalFile, sourceFilePath, redistributionResult.updatedSections, outputDirectory);
if (updatedContent !== (await FileUtils.readTextFile(externalFile))) {
modifiedFiles.push(externalFile);
changes.push({
type: 'link-updated',
filePath: externalFile,
});
if (!dryRun) {
transaction.addContentUpdate(externalFile, updatedContent, 'Update links to split sections');
}
}
}
// Execute or return dry-run results
if (dryRun) {
return {
success: true,
modifiedFiles: Array.from(new Set(modifiedFiles)),
createdFiles,
deletedFiles: [],
errors: [],
warnings,
changes,
};
}
// Execute the transaction
const executionResult = await transaction.execute();
if (!executionResult.success) {
return {
success: false,
modifiedFiles: [],
createdFiles: [],
deletedFiles: [],
errors: executionResult.errors,
warnings,
changes: [],
};
}
return {
success: true,
modifiedFiles: Array.from(new Set(modifiedFiles)),
createdFiles,
deletedFiles: [],
errors: [],
warnings,
changes,
};
}
catch (error) {
return {
success: false,
modifiedFiles: [],
createdFiles: [],
deletedFiles: [],
errors: [`Split operation failed: ${error}`],
warnings: [],
changes: [],
};
}
}
createSplitStrategy(strategy, options) {
switch (strategy) {
case 'headers':
return new HeaderBasedSplitStrategy(options);
case 'size':
return new SizeBasedSplitStrategy(options);
case 'manual':
return new ManualSplitStrategy(options);
case 'lines':
return new LineBasedSplitStrategy(options);
default:
throw new Error(`Unknown split strategy: ${strategy}`);
}
}
/** Redistribute links across split sections */
async redistributeLinks(splitResult, originalFile, outputDirectory) {
const updatedSections = [];
const externalLinkUpdates = [];
const errors = [];
// For each section, find which links belong to it and update them
for (const section of splitResult.sections) {
try {
const sectionFilePath = join(outputDirectory, section.filename);
// Find links that are within this section's line range
const sectionLinks = originalFile.links.filter((link) => link.line >= section.startLine + 1 && link.line <= section.endLine + 1);
// Update internal links within the section to account for new file location
const updatedContent = section.content;
const lines = updatedContent.split('\n');
for (const link of sectionLinks) {
if (link.type === 'internal' || link.type === 'claude-import') {
try {
const newHref = this.updateLinkForNewLocation(link, originalFile.filePath, sectionFilePath);
if (newHref !== link.href) {
const relativeLine = link.line - section.startLine - 1;
if (relativeLine >= 0 && relativeLine < lines.length) {
lines[relativeLine] = this.replaceLinkInLine(lines[relativeLine], link, newHref);
}
}
}
catch (error) {
errors.push(`Failed to update link in section ${section.title}: ${error}`);
}
}
}
updatedSections.push({
...section,
content: lines.join('\n'),
});
}
catch (error) {
errors.push(`Failed to process section ${section.title}: ${error}`);
updatedSections.push(section);
}
}
return {
updatedSections,
externalLinkUpdates,
errors,
};
}
updateLinkForNewLocation(link, originalFilePath, newFilePath) {
if (link.type === 'claude-import') {
return PathUtils.updateClaudeImportPath(link.href, originalFilePath, newFilePath);
}
if (link.type === 'internal') {
return PathUtils.updateRelativePath(link.href, originalFilePath, newFilePath);
}
return link.href;
}
replaceLinkInLine(line, link, newHref) {
if (link.type === 'claude-import') {
const oldImport = `@${link.href}`;
const newImport = `@${newHref}`;
return line.replace(oldImport, newImport);
}
// For regular markdown links
const escapedHref = link.href.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const linkRegex = new RegExp(`\\[([^\\]]*)\\]\\(\\s*${escapedHref}(\\s+"[^"]*")?\\s*\\)`, 'g');
return line.replace(linkRegex, `[$1](${newHref}$2)`);
}
/** Find files that reference the source file */
async findExternalReferences(sourceFilePath) {
try {
const projectRoot = dirname(sourceFilePath);
const markdownFiles = await FileUtils.findMarkdownFiles(projectRoot, true);
const referencingFiles = [];
for (const filePath of markdownFiles) {
if (filePath === sourceFilePath)
continue;
try {
const parsedFile = await this.linkParser.parseFile(filePath);
const hasReference = parsedFile.dependencies.includes(sourceFilePath);
if (hasReference) {
referencingFiles.push(filePath);
}
}
catch {
// Ignore files that can't be parsed
}
}
return referencingFiles;
}
catch (error) {
console.warn(`Failed to find external references: ${error}`);
return [];
}
}
/** Update external files that reference the split file */
async updateExternalFileLinks(externalFilePath, originalFilePath, sections, outputDirectory) {
try {
const content = await FileUtils.readTextFile(externalFilePath);
const parsedFile = await this.linkParser.parseFile(externalFilePath);
let updatedContent = content;
const lines = updatedContent.split('\n');
// Find links that point to the original file
const linksToUpdate = parsedFile.links.filter((link) => link.resolvedPath === originalFilePath);
// For now, update all links to point to the first section
// In a more sophisticated implementation, we could analyze the link context
// to determine which section it should point to
if (sections.length > 0 && linksToUpdate.length > 0) {
const firstSectionPath = join(outputDirectory, sections[0].filename);
for (const link of linksToUpdate) {
// Calculate relative path from external file to the first section file
let newHref = PathUtils.makeRelative(firstSectionPath, dirname(externalFilePath));
// Ensure relative paths start with ./ for markdown compatibility
if (!newHref.startsWith('./') && !newHref.startsWith('../') && !newHref.startsWith('/')) {
newHref = `./${newHref}`;
}
if (newHref !== link.href) {
const lineIndex = link.line - 1;
if (lineIndex >= 0 && lineIndex < lines.length) {
lines[lineIndex] = this.replaceLinkInLine(lines[lineIndex], link, newHref);
}
}
}
updatedContent = lines.join('\n');
}
return updatedContent;
}
catch (error) {
console.warn(`Failed to update external file ${externalFilePath}: ${error}`);
return FileUtils.readTextFile(externalFilePath);
}
}
}
//# sourceMappingURL=content-splitter.js.map