UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

128 lines (127 loc) 5.76 kB
import path from 'node:path'; import pc from 'picocolors'; import { RepomixError } from '../../shared/errorHandle.js'; export const getRootEntry = (relativeFilePath) => { const normalized = relativeFilePath.replaceAll(path.win32.sep, path.posix.sep); const [first] = normalized.split('/'); return first || normalized; }; export const buildOutputSplitGroups = (processedFiles, allFilePaths) => { const groupsByRootEntry = new Map(); for (const filePath of allFilePaths) { const rootEntry = getRootEntry(filePath); const existing = groupsByRootEntry.get(rootEntry); if (existing) { existing.allFilePaths.push(filePath); } else { groupsByRootEntry.set(rootEntry, { rootEntry, processedFiles: [], allFilePaths: [filePath] }); } } for (const processedFile of processedFiles) { const rootEntry = getRootEntry(processedFile.path); const existing = groupsByRootEntry.get(rootEntry); if (existing) { existing.processedFiles.push(processedFile); } else { groupsByRootEntry.set(rootEntry, { rootEntry, processedFiles: [processedFile], allFilePaths: [processedFile.path], }); } } return [...groupsByRootEntry.values()].sort((a, b) => a.rootEntry.localeCompare(b.rootEntry)); }; export const buildSplitOutputFilePath = (baseFilePath, partIndex) => { const ext = path.extname(baseFilePath); if (!ext) { return `${baseFilePath}.${partIndex}`; } const baseWithoutExt = baseFilePath.slice(0, -ext.length); return `${baseWithoutExt}.${partIndex}${ext}`; }; const getUtf8ByteLength = (content) => Buffer.byteLength(content, 'utf8'); const makeChunkConfig = (baseConfig, partIndex) => { if (partIndex === 1) { return baseConfig; } const git = { ...baseConfig.output.git, includeDiffs: false, includeLogs: false, }; return { ...baseConfig, output: { ...baseConfig.output, git, }, }; }; const renderGroups = async (groupsToRender, partIndex, rootDirs, baseConfig, gitDiffResult, gitLogResult, filePathsByRoot, emptyDirPaths, generateOutput) => { const chunkProcessedFiles = groupsToRender.flatMap((g) => g.processedFiles); const chunkAllFilePaths = groupsToRender.flatMap((g) => g.allFilePaths); const chunkConfig = makeChunkConfig(baseConfig, partIndex); return await generateOutput(rootDirs, chunkConfig, chunkProcessedFiles, chunkAllFilePaths, partIndex === 1 ? gitDiffResult : undefined, partIndex === 1 ? gitLogResult : undefined, filePathsByRoot, emptyDirPaths); }; export const generateSplitOutputParts = async ({ rootDirs, baseConfig, processedFiles, allFilePaths, maxBytesPerPart, gitDiffResult, gitLogResult, progressCallback, filePathsByRoot, emptyDirPaths, deps, }) => { if (!Number.isSafeInteger(maxBytesPerPart) || maxBytesPerPart <= 0) { throw new RepomixError(`Invalid maxBytesPerPart: ${maxBytesPerPart}`); } const groups = buildOutputSplitGroups(processedFiles, allFilePaths); if (groups.length === 0) { return []; } const parts = []; let currentGroups = []; let currentContent = ''; let currentBytes = 0; for (const group of groups) { const partIndex = parts.length + 1; const nextGroups = [...currentGroups, group]; progressCallback(`Generating output... (part ${partIndex}) ${pc.dim(`evaluating ${group.rootEntry}`)}`); const nextContent = await renderGroups(nextGroups, partIndex, rootDirs, baseConfig, gitDiffResult, gitLogResult, filePathsByRoot, emptyDirPaths, deps.generateOutput); const nextBytes = getUtf8ByteLength(nextContent); if (nextBytes <= maxBytesPerPart) { currentGroups = nextGroups; currentContent = nextContent; currentBytes = nextBytes; continue; } if (currentGroups.length === 0) { throw new RepomixError(`Cannot split output: root entry '${group.rootEntry}' exceeds max size. ` + `Part size ${nextBytes.toLocaleString()} bytes > limit ${maxBytesPerPart.toLocaleString()} bytes.`); } parts.push({ index: partIndex, filePath: buildSplitOutputFilePath(baseConfig.output.filePath, partIndex), content: currentContent, byteLength: currentBytes, groups: currentGroups, }); const newPartIndex = parts.length + 1; progressCallback(`Generating output... (part ${newPartIndex}) ${pc.dim(`evaluating ${group.rootEntry}`)}`); const singleGroupContent = await renderGroups([group], newPartIndex, rootDirs, baseConfig, gitDiffResult, gitLogResult, filePathsByRoot, emptyDirPaths, deps.generateOutput); const singleGroupBytes = getUtf8ByteLength(singleGroupContent); if (singleGroupBytes > maxBytesPerPart) { throw new RepomixError(`Cannot split output: root entry '${group.rootEntry}' exceeds max size. ` + `Part size ${singleGroupBytes.toLocaleString()} bytes > limit ${maxBytesPerPart.toLocaleString()} bytes.`); } currentGroups = [group]; currentContent = singleGroupContent; currentBytes = singleGroupBytes; } if (currentGroups.length > 0) { const finalIndex = parts.length + 1; parts.push({ index: finalIndex, filePath: buildSplitOutputFilePath(baseConfig.output.filePath, finalIndex), content: currentContent, byteLength: currentBytes, groups: currentGroups, }); } return parts; };