UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

124 lines (123 loc) 5.87 kB
import { logger } from '../../shared/logger.js'; import { getWorkerThreadCount, initTaskRunner } from '../../shared/processConcurrency.js'; import { buildSplitOutputFilePath } from '../output/outputSplit.js'; import { calculateFileMetrics } from './calculateFileMetrics.js'; import { calculateGitDiffMetrics } from './calculateGitDiffMetrics.js'; import { calculateGitLogMetrics } from './calculateGitLogMetrics.js'; import { calculateOutputMetrics } from './calculateOutputMetrics.js'; import { runTokenCount } from './metricsWorkerRunner.js'; export const createMetricsTaskRunner = (numOfTasks, encoding) => { const taskRunner = initTaskRunner({ numOfTasks, workerType: 'calculateMetrics', runtime: 'worker_threads', }); const { maxThreads } = getWorkerThreadCount(numOfTasks); const warmupPromise = Promise.all(Array.from({ length: maxThreads }, () => taskRunner.run({ content: '', encoding }).catch(() => 0))); return { taskRunner, warmupPromise }; }; const defaultDeps = { calculateFileMetrics, calculateOutputMetrics, calculateGitDiffMetrics, calculateGitLogMetrics, taskRunner: undefined, }; export const extractOutputWrapper = (output, processedFilesInOutputOrder) => { const wrapperSegments = []; let cursor = 0; for (const file of processedFilesInOutputOrder) { if (file.content.length === 0) continue; const idx = output.indexOf(file.content, cursor); if (idx === -1) { return null; } wrapperSegments.push(output.slice(cursor, idx)); cursor = idx + file.content.length; } wrapperSegments.push(output.slice(cursor)); return wrapperSegments.join(''); }; export const canUseFastOutputTokenPath = (config) => { if (config.output.splitOutput !== undefined) return false; if (config.output.parsableStyle) return false; const style = config.output.style; return style === 'xml' || style === 'markdown' || style === 'plain'; }; export const calculateMetrics = async (processedFiles, outputPromise, progressCallback, config, gitDiffResult, gitLogResult, overrideDeps = {}) => { const deps = { ...defaultDeps, ...overrideDeps }; progressCallback('Calculating metrics...'); const taskRunner = deps.taskRunner ?? initTaskRunner({ numOfTasks: processedFiles.length, workerType: 'calculateMetrics', runtime: 'worker_threads', }); try { const metricsTargetPaths = processedFiles.map((file) => file.path); const fileMetricsPromise = deps.calculateFileMetrics(processedFiles, metricsTargetPaths, config.tokenCount.encoding, progressCallback, { taskRunner }); const gitDiffMetricsPromise = deps.calculateGitDiffMetrics(config, gitDiffResult, { taskRunner }); const gitLogMetricsPromise = deps.calculateGitLogMetrics(config, gitLogResult, { taskRunner }); fileMetricsPromise.catch(() => { }); gitDiffMetricsPromise.catch(() => { }); gitLogMetricsPromise.catch(() => { }); const resolvedOutput = await outputPromise; const outputParts = Array.isArray(resolvedOutput) ? resolvedOutput : [resolvedOutput]; const singleOutput = canUseFastOutputTokenPath(config) && outputParts.length === 1 ? outputParts[0] : null; const outputWrapper = singleOutput !== null ? extractOutputWrapper(singleOutput, processedFiles) : null; if (singleOutput !== null && outputWrapper === null) { logger.trace('Fast-path unavailable, falling back to full output tokenization'); } const outputMetricsPromise = outputWrapper !== null ? (async () => { const wrapperTokensPromise = runTokenCount(taskRunner, { content: outputWrapper, encoding: config.tokenCount.encoding, }); const [allFileMetrics, wrapperTokens] = await Promise.all([fileMetricsPromise, wrapperTokensPromise]); const fileTokensSum = allFileMetrics.reduce((sum, f) => sum + f.tokenCount, 0); logger.trace(`Fast-path output tokens: files=${fileTokensSum}, wrapper=${wrapperTokens} (${outputWrapper.length} chars)`); return [fileTokensSum + wrapperTokens]; })() : Promise.all(outputParts.map((part, index) => { const partPath = outputParts.length > 1 ? buildSplitOutputFilePath(config.output.filePath, index + 1) : config.output.filePath; return deps.calculateOutputMetrics(part, config.tokenCount.encoding, partPath, { taskRunner }); })); const [fileMetrics, outputTokenCounts, gitDiffTokenCount, gitLogTokenCount] = await Promise.all([ fileMetricsPromise, outputMetricsPromise, gitDiffMetricsPromise, gitLogMetricsPromise, ]); const totalTokens = outputTokenCounts.reduce((sum, count) => sum + count, 0); const totalFiles = processedFiles.length; const totalCharacters = outputParts.reduce((sum, part) => sum + part.length, 0); const fileCharCounts = {}; for (const file of processedFiles) { fileCharCounts[file.path] = file.content.length; } const fileTokenCounts = {}; for (const file of fileMetrics) { fileTokenCounts[file.path] = file.tokenCount; } return { totalFiles, totalCharacters, totalTokens, fileCharCounts, fileTokenCounts, gitDiffTokenCount: gitDiffTokenCount, gitLogTokenCount: gitLogTokenCount.gitLogTokenCount, }; } finally { if (!deps.taskRunner) { await taskRunner.cleanup(); } } };