repomix
Version:
A tool to pack repository contents to single file for AI consumption
130 lines (129 loc) • 6.33 kB
JavaScript
import path from 'node:path';
import { logger } from '../shared/logger.js';
import { logMemoryUsage, withMemoryLogging } from '../shared/memoryUtils.js';
import { collectFiles } from './file/fileCollect.js';
import { sortPaths } from './file/filePathSort.js';
import { processFiles } from './file/fileProcess.js';
import { searchFiles } from './file/fileSearch.js';
import { getGitDiffs } from './git/gitDiffHandle.js';
import { getGitLogs } from './git/gitLogHandle.js';
import { calculateMetrics, createMetricsTaskRunner } from './metrics/calculateMetrics.js';
import { prefetchSortData, sortOutputFiles } from './output/outputSort.js';
import { produceOutput } from './packager/produceOutput.js';
import { validateFileSafety } from './security/validateFileSafety.js';
const defaultDeps = {
searchFiles,
collectFiles,
processFiles,
validateFileSafety,
produceOutput,
calculateMetrics,
createMetricsTaskRunner,
sortPaths,
sortOutputFiles,
prefetchSortData,
getGitDiffs,
getGitLogs,
packSkill: async (params) => {
const { packSkill } = await import('./skill/packSkill.js');
return packSkill(params);
},
};
export const pack = async (rootDirs, config, progressCallback = () => { }, overrideDeps = {}, explicitFiles, options = {}) => {
const deps = {
...defaultDeps,
...overrideDeps,
};
logMemoryUsage('Pack - Start');
const sortDataPromise = deps.prefetchSortData(config).catch((error) => {
logger.trace('Failed to prefetch sort data:', error);
});
progressCallback('Searching for files...');
const searchResultsByDir = await withMemoryLogging('Search Files', async () => Promise.all(rootDirs.map(async (rootDir) => {
const result = await deps.searchFiles(rootDir, config, explicitFiles);
return { rootDir, filePaths: result.filePaths, emptyDirPaths: result.emptyDirPaths };
})));
const emptyDirPaths = config.output.includeEmptyDirectories
? [...new Set(searchResultsByDir.flatMap((r) => r.emptyDirPaths))].sort()
: undefined;
progressCallback('Sorting files...');
const allFilePaths = searchResultsByDir.flatMap(({ filePaths }) => filePaths);
const sortedFilePaths = deps.sortPaths(allFilePaths);
const filePathSetByDir = new Map(searchResultsByDir.map(({ rootDir, filePaths }) => [rootDir, new Set(filePaths)]));
const sortedFilePathsByDir = rootDirs.map((rootDir) => ({
rootDir,
filePaths: sortedFilePaths.filter((filePath) => filePathSetByDir.get(rootDir)?.has(filePath) ?? false),
}));
const { taskRunner: metricsTaskRunner, warmupPromise: metricsWarmupPromise } = deps.createMetricsTaskRunner(allFilePaths.length, config.tokenCount.encoding);
try {
progressCallback('Collecting files...');
const [collectResults, gitDiffResult, gitLogResult] = await Promise.all([
withMemoryLogging('Collect Files', async () => await Promise.all(sortedFilePathsByDir.map(({ rootDir, filePaths }) => deps.collectFiles(filePaths, rootDir, config, progressCallback)))),
deps.getGitDiffs(rootDirs, config),
deps.getGitLogs(rootDirs, config),
]);
const rawFiles = collectResults.flatMap((curr) => curr.rawFiles);
const allSkippedFiles = collectResults.flatMap((curr) => curr.skippedFiles);
const [validationResult, allProcessedFiles] = await Promise.all([
withMemoryLogging('Security Check', () => deps.validateFileSafety(rawFiles, progressCallback, config, gitDiffResult, gitLogResult)),
withMemoryLogging('Process Files', () => {
progressCallback('Processing files...');
return deps.processFiles(rawFiles, config, progressCallback);
}),
]);
const { safeFilePaths, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } = validationResult;
const suspiciousPathSet = new Set(suspiciousFilesResults.map((r) => r.filePath));
const filteredProcessedFiles = suspiciousPathSet.size > 0 ? allProcessedFiles.filter((f) => !suspiciousPathSet.has(f.path)) : allProcessedFiles;
await sortDataPromise;
const processedFiles = await deps.sortOutputFiles(filteredProcessedFiles, config);
progressCallback('Generating output...');
if (config.skillGenerate !== undefined && options.skillDir) {
const result = await deps.packSkill({
rootDirs,
config,
options,
processedFiles,
allFilePaths,
gitDiffResult,
gitLogResult,
suspiciousFilesResults,
suspiciousGitDiffResults,
suspiciousGitLogResults,
safeFilePaths,
skippedFiles: allSkippedFiles,
progressCallback,
});
logMemoryUsage('Pack - End');
return result;
}
const filePathsByRoot = sortedFilePathsByDir.map(({ rootDir, filePaths }) => ({
rootLabel: path.basename(rootDir) || rootDir,
files: filePaths,
}));
await metricsWarmupPromise;
const outputPromise = deps.produceOutput(rootDirs, config, processedFiles, allFilePaths, gitDiffResult, gitLogResult, progressCallback, filePathsByRoot, emptyDirPaths);
const outputForMetricsPromise = outputPromise.then((r) => r.outputForMetrics);
const [{ outputFiles }, metrics] = await Promise.all([
outputPromise,
withMemoryLogging('Calculate Metrics', () => deps.calculateMetrics(processedFiles, outputForMetricsPromise, progressCallback, config, gitDiffResult, gitLogResult, {
taskRunner: metricsTaskRunner,
})),
]);
const result = {
...metrics,
...(outputFiles && { outputFiles }),
suspiciousFilesResults,
suspiciousGitDiffResults,
suspiciousGitLogResults,
processedFiles,
safeFilePaths,
skippedFiles: allSkippedFiles,
};
logMemoryUsage('Pack - End');
return result;
}
finally {
await metricsWarmupPromise.catch(() => { });
await metricsTaskRunner.cleanup();
}
};