repomix
Version:
A tool to pack repository contents to single file for AI consumption
124 lines (123 loc) • 5.87 kB
JavaScript
import { logger } from '../../shared/logger.js';
import { getWorkerThreadCount, initTaskRunner } from '../../shared/processConcurrency.js';
import { buildSplitOutputFilePath } from '../output/outputSplit.js';
import { calculateFileMetrics } from './calculateFileMetrics.js';
import { calculateGitDiffMetrics } from './calculateGitDiffMetrics.js';
import { calculateGitLogMetrics } from './calculateGitLogMetrics.js';
import { calculateOutputMetrics } from './calculateOutputMetrics.js';
import { runTokenCount } from './metricsWorkerRunner.js';
export const createMetricsTaskRunner = (numOfTasks, encoding) => {
const taskRunner = initTaskRunner({
numOfTasks,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
});
const { maxThreads } = getWorkerThreadCount(numOfTasks);
const warmupPromise = Promise.all(Array.from({ length: maxThreads }, () => taskRunner.run({ content: '', encoding }).catch(() => 0)));
return { taskRunner, warmupPromise };
};
const defaultDeps = {
calculateFileMetrics,
calculateOutputMetrics,
calculateGitDiffMetrics,
calculateGitLogMetrics,
taskRunner: undefined,
};
export const extractOutputWrapper = (output, processedFilesInOutputOrder) => {
const wrapperSegments = [];
let cursor = 0;
for (const file of processedFilesInOutputOrder) {
if (file.content.length === 0)
continue;
const idx = output.indexOf(file.content, cursor);
if (idx === -1) {
return null;
}
wrapperSegments.push(output.slice(cursor, idx));
cursor = idx + file.content.length;
}
wrapperSegments.push(output.slice(cursor));
return wrapperSegments.join('');
};
export const canUseFastOutputTokenPath = (config) => {
if (config.output.splitOutput !== undefined)
return false;
if (config.output.parsableStyle)
return false;
const style = config.output.style;
return style === 'xml' || style === 'markdown' || style === 'plain';
};
export const calculateMetrics = async (processedFiles, outputPromise, progressCallback, config, gitDiffResult, gitLogResult, overrideDeps = {}) => {
const deps = { ...defaultDeps, ...overrideDeps };
progressCallback('Calculating metrics...');
const taskRunner = deps.taskRunner ??
initTaskRunner({
numOfTasks: processedFiles.length,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
});
try {
const metricsTargetPaths = processedFiles.map((file) => file.path);
const fileMetricsPromise = deps.calculateFileMetrics(processedFiles, metricsTargetPaths, config.tokenCount.encoding, progressCallback, { taskRunner });
const gitDiffMetricsPromise = deps.calculateGitDiffMetrics(config, gitDiffResult, { taskRunner });
const gitLogMetricsPromise = deps.calculateGitLogMetrics(config, gitLogResult, { taskRunner });
fileMetricsPromise.catch(() => { });
gitDiffMetricsPromise.catch(() => { });
gitLogMetricsPromise.catch(() => { });
const resolvedOutput = await outputPromise;
const outputParts = Array.isArray(resolvedOutput) ? resolvedOutput : [resolvedOutput];
const singleOutput = canUseFastOutputTokenPath(config) && outputParts.length === 1 ? outputParts[0] : null;
const outputWrapper = singleOutput !== null ? extractOutputWrapper(singleOutput, processedFiles) : null;
if (singleOutput !== null && outputWrapper === null) {
logger.trace('Fast-path unavailable, falling back to full output tokenization');
}
const outputMetricsPromise = outputWrapper !== null
? (async () => {
const wrapperTokensPromise = runTokenCount(taskRunner, {
content: outputWrapper,
encoding: config.tokenCount.encoding,
});
const [allFileMetrics, wrapperTokens] = await Promise.all([fileMetricsPromise, wrapperTokensPromise]);
const fileTokensSum = allFileMetrics.reduce((sum, f) => sum + f.tokenCount, 0);
logger.trace(`Fast-path output tokens: files=${fileTokensSum}, wrapper=${wrapperTokens} (${outputWrapper.length} chars)`);
return [fileTokensSum + wrapperTokens];
})()
: Promise.all(outputParts.map((part, index) => {
const partPath = outputParts.length > 1
? buildSplitOutputFilePath(config.output.filePath, index + 1)
: config.output.filePath;
return deps.calculateOutputMetrics(part, config.tokenCount.encoding, partPath, { taskRunner });
}));
const [fileMetrics, outputTokenCounts, gitDiffTokenCount, gitLogTokenCount] = await Promise.all([
fileMetricsPromise,
outputMetricsPromise,
gitDiffMetricsPromise,
gitLogMetricsPromise,
]);
const totalTokens = outputTokenCounts.reduce((sum, count) => sum + count, 0);
const totalFiles = processedFiles.length;
const totalCharacters = outputParts.reduce((sum, part) => sum + part.length, 0);
const fileCharCounts = {};
for (const file of processedFiles) {
fileCharCounts[file.path] = file.content.length;
}
const fileTokenCounts = {};
for (const file of fileMetrics) {
fileTokenCounts[file.path] = file.tokenCount;
}
return {
totalFiles,
totalCharacters,
totalTokens,
fileCharCounts,
fileTokenCounts,
gitDiffTokenCount: gitDiffTokenCount,
gitLogTokenCount: gitLogTokenCount.gitLogTokenCount,
};
}
finally {
if (!deps.taskRunner) {
await taskRunner.cleanup();
}
}
};