UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

117 lines (116 loc) 4.09 kB
import { logger } from '../../shared/logger.js'; import { LanguageParser } from './languageParser.js'; let languageParserSingleton = null; export const CHUNK_SEPARATOR = '⋮----'; export const parseFile = async (fileContent, filePath, config) => { const languageParser = await getLanguageParserSingleton(); const lines = fileContent.split('\n'); if (lines.length < 1) { return ''; } const lang = languageParser.guessTheLang(filePath); if (lang === undefined) { return undefined; } const query = await languageParser.getQueryForLang(lang); const parser = await languageParser.getParserForLang(lang); const processedChunks = new Set(); const capturedChunks = []; try { const tree = parser.parse(fileContent); if (!tree) { logger.debug(`Failed to parse file: ${filePath}`); return undefined; } const parseStrategy = await languageParser.getStrategyForLang(lang); const context = { fileContent, lines, tree, query, config, }; const captures = query.captures(tree.rootNode); captures.sort((a, b) => a.node.startPosition.row - b.node.startPosition.row); for (const capture of captures) { const capturedChunkContent = parseStrategy.parseCapture(capture, lines, processedChunks, context); if (capturedChunkContent !== null) { capturedChunks.push({ content: capturedChunkContent.trim(), startRow: capture.node.startPosition.row, endRow: capture.node.endPosition.row, }); } } } catch (error) { logger.log(`Error parsing file: ${error}\n`); } const filteredChunks = filterDuplicatedChunks(capturedChunks); const mergedChunks = mergeAdjacentChunks(filteredChunks); return mergedChunks .map((chunk) => chunk.content) .join(`\n${CHUNK_SEPARATOR}\n`) .trim(); }; const getLanguageParserSingleton = async () => { if (!languageParserSingleton) { languageParserSingleton = new LanguageParser(); await languageParserSingleton.init(); } return languageParserSingleton; }; export const cleanupLanguageParser = async () => { if (languageParserSingleton) { try { await languageParserSingleton.dispose(); logger.debug('Language parser singleton deleted'); } catch (err) { logger.debug('Language parser dispose threw', err); } finally { languageParserSingleton = null; } } }; const filterDuplicatedChunks = (chunks) => { const chunksByStartRow = new Map(); for (const chunk of chunks) { const startRow = chunk.startRow; if (!chunksByStartRow.has(startRow)) { chunksByStartRow.set(startRow, []); } chunksByStartRow.get(startRow)?.push(chunk); } const filteredChunks = []; for (const [_, rowChunks] of chunksByStartRow) { rowChunks.sort((a, b) => b.content.length - a.content.length); filteredChunks.push(rowChunks[0]); } return filteredChunks.sort((a, b) => a.startRow - b.startRow); }; const mergeAdjacentChunks = (chunks) => { if (chunks.length <= 1) { return chunks; } const merged = []; let contentParts = [chunks[0].content]; let startRow = chunks[0].startRow; let endRow = chunks[0].endRow; for (let i = 1; i < chunks.length; i++) { const current = chunks[i]; if (endRow + 1 === current.startRow) { contentParts.push(current.content); endRow = current.endRow; } else { merged.push({ content: contentParts.join('\n'), startRow, endRow }); contentParts = [current.content]; startRow = current.startRow; endRow = current.endRow; } } merged.push({ content: contentParts.join('\n'), startRow, endRow }); return merged; };