UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

130 lines (129 loc) 6.33 kB
import path from 'node:path'; import { logger } from '../shared/logger.js'; import { logMemoryUsage, withMemoryLogging } from '../shared/memoryUtils.js'; import { collectFiles } from './file/fileCollect.js'; import { sortPaths } from './file/filePathSort.js'; import { processFiles } from './file/fileProcess.js'; import { searchFiles } from './file/fileSearch.js'; import { getGitDiffs } from './git/gitDiffHandle.js'; import { getGitLogs } from './git/gitLogHandle.js'; import { calculateMetrics, createMetricsTaskRunner } from './metrics/calculateMetrics.js'; import { prefetchSortData, sortOutputFiles } from './output/outputSort.js'; import { produceOutput } from './packager/produceOutput.js'; import { validateFileSafety } from './security/validateFileSafety.js'; const defaultDeps = { searchFiles, collectFiles, processFiles, validateFileSafety, produceOutput, calculateMetrics, createMetricsTaskRunner, sortPaths, sortOutputFiles, prefetchSortData, getGitDiffs, getGitLogs, packSkill: async (params) => { const { packSkill } = await import('./skill/packSkill.js'); return packSkill(params); }, }; export const pack = async (rootDirs, config, progressCallback = () => { }, overrideDeps = {}, explicitFiles, options = {}) => { const deps = { ...defaultDeps, ...overrideDeps, }; logMemoryUsage('Pack - Start'); const sortDataPromise = deps.prefetchSortData(config).catch((error) => { logger.trace('Failed to prefetch sort data:', error); }); progressCallback('Searching for files...'); const searchResultsByDir = await withMemoryLogging('Search Files', async () => Promise.all(rootDirs.map(async (rootDir) => { const result = await deps.searchFiles(rootDir, config, explicitFiles); return { rootDir, filePaths: result.filePaths, emptyDirPaths: result.emptyDirPaths }; }))); const emptyDirPaths = config.output.includeEmptyDirectories ? [...new Set(searchResultsByDir.flatMap((r) => r.emptyDirPaths))].sort() : undefined; progressCallback('Sorting files...'); const allFilePaths = searchResultsByDir.flatMap(({ filePaths }) => filePaths); const sortedFilePaths = deps.sortPaths(allFilePaths); const filePathSetByDir = new Map(searchResultsByDir.map(({ rootDir, filePaths }) => [rootDir, new Set(filePaths)])); const sortedFilePathsByDir = rootDirs.map((rootDir) => ({ rootDir, filePaths: sortedFilePaths.filter((filePath) => filePathSetByDir.get(rootDir)?.has(filePath) ?? false), })); const { taskRunner: metricsTaskRunner, warmupPromise: metricsWarmupPromise } = deps.createMetricsTaskRunner(allFilePaths.length, config.tokenCount.encoding); try { progressCallback('Collecting files...'); const [collectResults, gitDiffResult, gitLogResult] = await Promise.all([ withMemoryLogging('Collect Files', async () => await Promise.all(sortedFilePathsByDir.map(({ rootDir, filePaths }) => deps.collectFiles(filePaths, rootDir, config, progressCallback)))), deps.getGitDiffs(rootDirs, config), deps.getGitLogs(rootDirs, config), ]); const rawFiles = collectResults.flatMap((curr) => curr.rawFiles); const allSkippedFiles = collectResults.flatMap((curr) => curr.skippedFiles); const [validationResult, allProcessedFiles] = await Promise.all([ withMemoryLogging('Security Check', () => deps.validateFileSafety(rawFiles, progressCallback, config, gitDiffResult, gitLogResult)), withMemoryLogging('Process Files', () => { progressCallback('Processing files...'); return deps.processFiles(rawFiles, config, progressCallback); }), ]); const { safeFilePaths, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } = validationResult; const suspiciousPathSet = new Set(suspiciousFilesResults.map((r) => r.filePath)); const filteredProcessedFiles = suspiciousPathSet.size > 0 ? allProcessedFiles.filter((f) => !suspiciousPathSet.has(f.path)) : allProcessedFiles; await sortDataPromise; const processedFiles = await deps.sortOutputFiles(filteredProcessedFiles, config); progressCallback('Generating output...'); if (config.skillGenerate !== undefined && options.skillDir) { const result = await deps.packSkill({ rootDirs, config, options, processedFiles, allFilePaths, gitDiffResult, gitLogResult, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults, safeFilePaths, skippedFiles: allSkippedFiles, progressCallback, }); logMemoryUsage('Pack - End'); return result; } const filePathsByRoot = sortedFilePathsByDir.map(({ rootDir, filePaths }) => ({ rootLabel: path.basename(rootDir) || rootDir, files: filePaths, })); await metricsWarmupPromise; const outputPromise = deps.produceOutput(rootDirs, config, processedFiles, allFilePaths, gitDiffResult, gitLogResult, progressCallback, filePathsByRoot, emptyDirPaths); const outputForMetricsPromise = outputPromise.then((r) => r.outputForMetrics); const [{ outputFiles }, metrics] = await Promise.all([ outputPromise, withMemoryLogging('Calculate Metrics', () => deps.calculateMetrics(processedFiles, outputForMetricsPromise, progressCallback, config, gitDiffResult, gitLogResult, { taskRunner: metricsTaskRunner, })), ]); const result = { ...metrics, ...(outputFiles && { outputFiles }), suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults, processedFiles, safeFilePaths, skippedFiles: allSkippedFiles, }; logMemoryUsage('Pack - End'); return result; } finally { await metricsWarmupPromise.catch(() => { }); await metricsTaskRunner.cleanup(); } };