repomix
Version:
A tool to pack repository contents to single file for AI consumption
117 lines (116 loc) • 4.09 kB
JavaScript
import { logger } from '../../shared/logger.js';
import { LanguageParser } from './languageParser.js';
let languageParserSingleton = null;
export const CHUNK_SEPARATOR = '⋮----';
export const parseFile = async (fileContent, filePath, config) => {
const languageParser = await getLanguageParserSingleton();
const lines = fileContent.split('\n');
if (lines.length < 1) {
return '';
}
const lang = languageParser.guessTheLang(filePath);
if (lang === undefined) {
return undefined;
}
const query = await languageParser.getQueryForLang(lang);
const parser = await languageParser.getParserForLang(lang);
const processedChunks = new Set();
const capturedChunks = [];
try {
const tree = parser.parse(fileContent);
if (!tree) {
logger.debug(`Failed to parse file: ${filePath}`);
return undefined;
}
const parseStrategy = await languageParser.getStrategyForLang(lang);
const context = {
fileContent,
lines,
tree,
query,
config,
};
const captures = query.captures(tree.rootNode);
captures.sort((a, b) => a.node.startPosition.row - b.node.startPosition.row);
for (const capture of captures) {
const capturedChunkContent = parseStrategy.parseCapture(capture, lines, processedChunks, context);
if (capturedChunkContent !== null) {
capturedChunks.push({
content: capturedChunkContent.trim(),
startRow: capture.node.startPosition.row,
endRow: capture.node.endPosition.row,
});
}
}
}
catch (error) {
logger.log(`Error parsing file: ${error}\n`);
}
const filteredChunks = filterDuplicatedChunks(capturedChunks);
const mergedChunks = mergeAdjacentChunks(filteredChunks);
return mergedChunks
.map((chunk) => chunk.content)
.join(`\n${CHUNK_SEPARATOR}\n`)
.trim();
};
const getLanguageParserSingleton = async () => {
if (!languageParserSingleton) {
languageParserSingleton = new LanguageParser();
await languageParserSingleton.init();
}
return languageParserSingleton;
};
export const cleanupLanguageParser = async () => {
if (languageParserSingleton) {
try {
await languageParserSingleton.dispose();
logger.debug('Language parser singleton deleted');
}
catch (err) {
logger.debug('Language parser dispose threw', err);
}
finally {
languageParserSingleton = null;
}
}
};
const filterDuplicatedChunks = (chunks) => {
const chunksByStartRow = new Map();
for (const chunk of chunks) {
const startRow = chunk.startRow;
if (!chunksByStartRow.has(startRow)) {
chunksByStartRow.set(startRow, []);
}
chunksByStartRow.get(startRow)?.push(chunk);
}
const filteredChunks = [];
for (const [_, rowChunks] of chunksByStartRow) {
rowChunks.sort((a, b) => b.content.length - a.content.length);
filteredChunks.push(rowChunks[0]);
}
return filteredChunks.sort((a, b) => a.startRow - b.startRow);
};
const mergeAdjacentChunks = (chunks) => {
if (chunks.length <= 1) {
return chunks;
}
const merged = [];
let contentParts = [chunks[0].content];
let startRow = chunks[0].startRow;
let endRow = chunks[0].endRow;
for (let i = 1; i < chunks.length; i++) {
const current = chunks[i];
if (endRow + 1 === current.startRow) {
contentParts.push(current.content);
endRow = current.endRow;
}
else {
merged.push({ content: contentParts.join('\n'), startRow, endRow });
contentParts = [current.content];
startRow = current.startRow;
endRow = current.endRow;
}
}
merged.push({ content: contentParts.join('\n'), startRow, endRow });
return merged;
};