UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

299 lines (298 loc) 14 kB
import fs from 'node:fs/promises'; import path from 'node:path'; import Handlebars from 'handlebars'; import { RepomixError } from '../../shared/errorHandle.js'; import { listDirectories, listFiles, searchFiles } from '../file/fileSearch.js'; import { generateTreeString, generateTreeStringWithRoots } from '../file/fileTreeGenerate.js'; import { sortOutputFiles } from './outputSort.js'; import { generateHeader, generateSummaryFileFormat, generateSummaryFileFormatJson, generateSummaryNotes, generateSummaryPurpose, generateSummaryUsageGuidelines, } from './outputStyleDecorate.js'; import { getMarkdownTemplate } from './outputStyles/markdownStyle.js'; import { getPlainTemplate } from './outputStyles/plainStyle.js'; import { getXmlTemplate } from './outputStyles/xmlStyle.js'; const compiledTemplateCache = new Map(); const getCompiledTemplate = (style) => { const cached = compiledTemplateCache.get(style); if (cached) { return cached; } let template; switch (style) { case 'xml': template = getXmlTemplate(); break; case 'markdown': template = getMarkdownTemplate(); break; case 'plain': template = getPlainTemplate(); break; default: throw new RepomixError(`Unsupported output style for handlebars template: ${style}`); } const compiled = Handlebars.compile(template); compiledTemplateCache.set(style, compiled); return compiled; }; const calculateMarkdownDelimiter = (files) => { const maxBackticks = files .flatMap((file) => file.content.match(/`+/g) ?? []) .reduce((max, match) => Math.max(max, match.length), 0); return '`'.repeat(Math.max(3, maxBackticks + 1)); }; const calculateFileLineCounts = (processedFiles) => { const lineCounts = {}; for (const file of processedFiles) { const content = file.content; if (content.length === 0) { lineCounts[file.path] = 0; } else { const newlineCount = (content.match(/\n/g) || []).length; lineCounts[file.path] = content.endsWith('\n') ? newlineCount : newlineCount + 1; } } return lineCounts; }; export const createRenderContext = (outputGeneratorContext) => { return { generationHeader: generateHeader(outputGeneratorContext.config, outputGeneratorContext.generationDate), summaryPurpose: generateSummaryPurpose(outputGeneratorContext.config), summaryFileFormat: generateSummaryFileFormat(), summaryUsageGuidelines: generateSummaryUsageGuidelines(outputGeneratorContext.config, outputGeneratorContext.instruction), summaryNotes: generateSummaryNotes(outputGeneratorContext.config), headerText: outputGeneratorContext.config.output.headerText, instruction: outputGeneratorContext.instruction, treeString: outputGeneratorContext.treeString, processedFiles: outputGeneratorContext.processedFiles, fileLineCounts: calculateFileLineCounts(outputGeneratorContext.processedFiles), fileSummaryEnabled: outputGeneratorContext.config.output.fileSummary, directoryStructureEnabled: outputGeneratorContext.config.output.directoryStructure, filesEnabled: outputGeneratorContext.config.output.files, escapeFileContent: outputGeneratorContext.config.output.parsableStyle, markdownCodeBlockDelimiter: calculateMarkdownDelimiter(outputGeneratorContext.processedFiles), gitDiffEnabled: outputGeneratorContext.config.output.git?.includeDiffs, gitDiffWorkTree: outputGeneratorContext.gitDiffResult?.workTreeDiffContent, gitDiffStaged: outputGeneratorContext.gitDiffResult?.stagedDiffContent, gitLogEnabled: outputGeneratorContext.config.output.git?.includeLogs, gitLogContent: outputGeneratorContext.gitLogResult?.logContent, gitLogCommits: outputGeneratorContext.gitLogResult?.commits, }; }; const generateParsableXmlOutput = async (renderContext) => { const FastXmlBuilder = (await import('fast-xml-builder')).default; const xmlBuilder = new FastXmlBuilder({ ignoreAttributes: false }); const xmlDocument = { repomix: { file_summary: renderContext.fileSummaryEnabled ? { '#text': renderContext.generationHeader, purpose: renderContext.summaryPurpose, file_format: `${renderContext.summaryFileFormat} 5. Repository files, each consisting of: - File path as an attribute - Full contents of the file`, usage_guidelines: renderContext.summaryUsageGuidelines, notes: renderContext.summaryNotes, } : undefined, user_provided_header: renderContext.headerText, directory_structure: renderContext.directoryStructureEnabled ? renderContext.treeString : undefined, files: renderContext.filesEnabled ? { '#text': "This section contains the contents of the repository's files.", file: renderContext.processedFiles.map((file) => ({ '#text': file.content, '@_path': file.path, })), } : undefined, git_diffs: renderContext.gitDiffEnabled ? { git_diff_work_tree: renderContext.gitDiffWorkTree, git_diff_staged: renderContext.gitDiffStaged, } : undefined, git_logs: renderContext.gitLogEnabled ? { git_log_commit: renderContext.gitLogCommits?.map((commit) => ({ date: commit.date, message: commit.message, files: commit.files.map((file) => ({ '#text': file })), })), } : undefined, instruction: renderContext.instruction ? renderContext.instruction : undefined, }, }; try { return xmlBuilder.build(xmlDocument); } catch (error) { throw new RepomixError(`Failed to generate XML output: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? { cause: error } : undefined); } }; const generateParsableJsonOutput = async (renderContext) => { const jsonDocument = { ...(renderContext.fileSummaryEnabled && { fileSummary: { generationHeader: renderContext.generationHeader, purpose: renderContext.summaryPurpose, fileFormat: generateSummaryFileFormatJson(), usageGuidelines: renderContext.summaryUsageGuidelines, notes: renderContext.summaryNotes, }, }), ...(renderContext.headerText && { userProvidedHeader: renderContext.headerText, }), ...(renderContext.directoryStructureEnabled && { directoryStructure: renderContext.treeString, }), ...(renderContext.filesEnabled && { files: renderContext.processedFiles.reduce((acc, file) => { acc[file.path] = file.content; return acc; }, {}), }), ...(renderContext.gitDiffEnabled && { gitDiffs: { workTree: renderContext.gitDiffWorkTree, staged: renderContext.gitDiffStaged, }, }), ...(renderContext.gitLogEnabled && { gitLogs: renderContext.gitLogCommits?.map((commit) => ({ date: commit.date, message: commit.message, files: commit.files, })), }), ...(renderContext.instruction && { instruction: renderContext.instruction, }), }; try { return JSON.stringify(jsonDocument, null, 2); } catch (error) { throw new RepomixError(`Failed to generate JSON output: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? { cause: error } : undefined); } }; const generateHandlebarOutput = async (config, renderContext, processedFiles) => { try { const compiledTemplate = getCompiledTemplate(config.output.style); return `${compiledTemplate(renderContext).trim()}\n`; } catch (error) { if (error instanceof RangeError && error.message === 'Invalid string length') { let largeFilesInfo = ''; if (processedFiles && processedFiles.length > 0) { const topFiles = processedFiles .sort((a, b) => b.content.length - a.content.length) .slice(0, 5) .map((f) => ` - ${f.path} (${(f.content.length / 1024 / 1024).toFixed(1)} MB)`) .join('\n'); largeFilesInfo = `\n\nLargest files in this repository:\n${topFiles}`; } throw new RepomixError(`Output size exceeds JavaScript string limit. The repository contains files that are too large to process. Please try: - Use --ignore to exclude large files (e.g., --ignore "docs/**" or --ignore "*.html") - Use --include to process only specific files - Process smaller portions of the repository at a time${largeFilesInfo}`, { cause: error }); } throw new RepomixError(`Failed to compile template: ${error instanceof Error ? error.message : 'Unknown error'}`, error instanceof Error ? { cause: error } : undefined); } }; export const generateOutput = async (rootDirs, config, processedFiles, allFilePaths, gitDiffResult = undefined, gitLogResult = undefined, filePathsByRoot, emptyDirPaths, deps = { buildOutputGeneratorContext, generateHandlebarOutput, generateParsableXmlOutput, generateParsableJsonOutput, sortOutputFiles, }) => { const sortedProcessedFiles = await deps.sortOutputFiles(processedFiles, config); const outputGeneratorContext = await deps.buildOutputGeneratorContext(rootDirs, config, allFilePaths, sortedProcessedFiles, gitDiffResult, gitLogResult, filePathsByRoot, emptyDirPaths); const renderContext = createRenderContext(outputGeneratorContext); switch (config.output.style) { case 'xml': return config.output.parsableStyle ? deps.generateParsableXmlOutput(renderContext) : deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles); case 'json': return deps.generateParsableJsonOutput(renderContext); case 'markdown': case 'plain': return deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles); default: throw new RepomixError(`Unsupported output style: ${config.output.style}`); } }; export const buildOutputGeneratorContext = async (rootDirs, config, allFilePaths, processedFiles, gitDiffResult = undefined, gitLogResult = undefined, filePathsByRoot, emptyDirPaths, deps = { listDirectories, listFiles, searchFiles, }) => { let repositoryInstruction = ''; if (config.output.instructionFilePath) { const instructionPath = path.resolve(config.cwd, config.output.instructionFilePath); try { repositoryInstruction = await fs.readFile(instructionPath, 'utf-8'); } catch { throw new RepomixError(`Instruction file not found at ${instructionPath}`); } } const shouldUseFullTree = config.output.directoryStructure === true && !!config.output.includeFullDirectoryStructure && (config.include?.length ?? 0) > 0; let directoryPathsForTree = []; let filePathsForTree = allFilePaths; if (shouldUseFullTree) { try { const [allDirectoriesByRoot, allFilesByRoot] = await Promise.all([ Promise.all(rootDirs.map((rootDir) => deps.listDirectories(rootDir, config))), Promise.all(rootDirs.map((rootDir) => deps.listFiles(rootDir, config))), ]); const allDirectories = Array.from(new Set(allDirectoriesByRoot.flat())).sort(); const allRepoFiles = Array.from(new Set(allFilesByRoot.flat())); const includedSet = new Set(allFilePaths); const additionalFiles = allRepoFiles.filter((p) => !includedSet.has(p)); directoryPathsForTree = allDirectories; filePathsForTree = allFilePaths.concat(additionalFiles); } catch (error) { throw new RepomixError(`Failed to build full directory structure: ${error instanceof Error ? error.message : String(error)}`, error instanceof Error ? { cause: error } : undefined); } } else if (config.output.directoryStructure && config.output.includeEmptyDirectories) { if (emptyDirPaths) { directoryPathsForTree = emptyDirPaths; } else { try { const results = await Promise.all(rootDirs.map((rootDir) => deps.searchFiles(rootDir, config))); const merged = results.flatMap((r) => r.emptyDirPaths); directoryPathsForTree = [...new Set(merged)].sort(); } catch (error) { throw new RepomixError(`Failed to search for empty directories: ${error instanceof Error ? error.message : String(error)}`, error instanceof Error ? { cause: error } : undefined); } } } let treeString; if (filePathsByRoot) { treeString = generateTreeStringWithRoots(filePathsByRoot, directoryPathsForTree); } else { treeString = generateTreeString(filePathsForTree, directoryPathsForTree); } return { generationDate: new Date().toISOString(), treeString, processedFiles, config, instruction: repositoryInstruction, gitDiffResult, gitLogResult, }; };