UNPKG

@husniadil/codebase-analyzer

Version:

A compr ehensive tool for analyzing and summarizing codebases, supporting multiple file types and providing detailed file tree views and context extraction.

247 lines (242 loc) 7.43 kB
// src/codebase-analyzer.ts import { promises as fs } from "node:fs"; import * as path from "node:path"; import { encodingForModel } from "js-tiktoken"; // src/util.ts var formatSize = (bytes) => { if (bytes === 0) return "0 bytes"; const units = ["bytes", "KB", "MB", "GB", "TB"]; const size = Math.abs(bytes); const index = Math.floor(Math.log(size) / Math.log(1024)); const formattedSize = (size / 1024 ** index).toPrecision(3); return `${Math.sign(bytes) * Number(formattedSize)} ${units[index]}`; }; // src/codebase-analyzer.ts var CodebaseAnalyzer = class { directory; relevantExtensions; maxFileSize; enableTokenCounting; maxTokens; ignorePatterns; ignoreFilesWithNoExtension; memoryLimitMB; processedFiles = 0; totalFiles = 0; totalSize = 0; constructor(config = {}) { const { directory = ".", relevantExtensions = [ ".js", ".ts", ".jsx", ".tsx", ".json", ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".go", ".py", ".rb", ".php", ".html", ".css", ".scss", ".less" ], maxFileSize = 1e5, enableTokenCounting = false, maxTokens = 1e5, ignorePatterns = [ "node_modules", "vendor", "dist", "build", "public", "android", "fastlane", "ios", "tmp", "package.lock.json" ], ignoreFilesWithNoExtension = true, memoryLimitMB = 64 } = config; if (directory.trim() === "") { throw new Error("directory must not be empty."); } this.directory = path.resolve(directory); this.relevantExtensions = relevantExtensions; this.maxFileSize = maxFileSize; this.enableTokenCounting = enableTokenCounting; this.maxTokens = maxTokens; this.ignorePatterns = ignorePatterns; this.ignoreFilesWithNoExtension = ignoreFilesWithNoExtension; this.memoryLimitMB = memoryLimitMB; } async shouldIgnore(filePath) { const relativePath = path.relative(this.directory, filePath); const patterns = this.ignorePatterns.map((pattern) => new RegExp(pattern)); return relativePath.startsWith(".") || patterns.some((pattern) => pattern.test(relativePath) || pattern.test(path.basename(filePath))); } hasExtension(filePath) { return path.extname(filePath) !== ""; } async isRelevantFile(filePath) { if (this.ignoreFilesWithNoExtension && !this.hasExtension(filePath)) { return false; } try { const stats = await fs.stat(filePath); if (!stats.isFile()) return false; return stats.size <= this.maxFileSize && this.relevantExtensions.some((ext) => filePath.endsWith(ext)); } catch (error) { console.warn(`Error checking relevance of file ${filePath}:`, error); return false; } } async gatherFiles(dir) { const files = []; try { const items = await fs.readdir(dir); const filePromises = items.map(async (item) => { const fullPath = path.join(dir, item); if (await this.shouldIgnore(fullPath)) return null; const stats = await fs.stat(fullPath); const node = { name: item, path: fullPath, size: stats.size, isDirectory: stats.isDirectory(), children: [] }; if (stats.isDirectory()) { node.children = await this.gatherFiles(fullPath); if (node.children.length > 0) { return node; } } else if (await this.isRelevantFile(fullPath)) { this.totalSize += stats.size; return node; } return null; }); const resolvedFiles = await Promise.all(filePromises); for (const file of resolvedFiles) { if (file) { files.push(file); } } } catch (error) { console.warn(`Error gathering files in directory ${dir}:`, error); } return files; } async countTotalFiles(nodes) { const counts = await Promise.all( nodes.map(async (node) => { if (node.isDirectory) { return await this.countTotalFiles(node.children); } else { return 1; } }) ); return counts.reduce((acc, count) => acc + count, 0); } checkMemoryUsage() { const usedMemoryMB = process.memoryUsage().heapUsed / 1024 / 1024; if (usedMemoryMB > this.memoryLimitMB) { console.error( `Codebase ~ checkMemoryUsage ~ memory limit exceeded: ${usedMemoryMB.toFixed( 2 )} MB used, limit is ${this.memoryLimitMB} MB.` ); throw new Error("Your codebase is too large to process. Please try again with a smaller one."); } } async processFileTree(nodes, context) { for (const node of nodes) { if (node.isDirectory) { await this.processFileTree(node.children, context); } else { try { const content = await fs.readFile(node.path, "utf-8"); context.push(`File: ${node.path} (${formatSize(node.size)}) ${content.trim()} ${"=".repeat(20)} `); this.processedFiles++; } catch (error) { console.warn("Codebase ~ gatherContext ~ error:", node.path, error); } } this.checkMemoryUsage(); } } async gatherContext(fileTree) { this.totalFiles = await this.countTotalFiles(fileTree); const context = []; await this.processFileTree(fileTree, context); if (context.length === 0) { throw new Error("No relevant files found."); } return context.join("\n"); } truncateContext(context) { const tokens = context.split(/\s+/); if (this.enableTokenCounting ? tokens.length > this.maxTokens : false) { console.warn("Context truncated due to token limit."); return tokens.slice(0, this.maxTokens).join(" "); } return context; } async countTokens(code) { const encoder = encodingForModel("gpt-4o"); const tokens = encoder.encode(code); return tokens.length; } async buildTreeView(nodes, indent = "") { let treeView = ""; for (const [index, node] of nodes.entries()) { const isLastItem = index === nodes.length - 1; const size = node.isDirectory ? "" : ` ${formatSize(node.size)}`; treeView += `${indent}${isLastItem ? "\u2514\u2500\u2500" : "\u251C\u2500\u2500"} ${node.isDirectory ? "\u{1F4C1}" : "\u2630"} ${node.name}${size} `; if (node.isDirectory) { treeView += await this.buildTreeView(node.children, `${indent}${isLastItem ? " " : "\u2502 "}`); } } return treeView; } async analyze() { try { const fileNode = await this.gatherFiles(this.directory); const context = await this.gatherContext(fileNode); const truncatedContext = this.truncateContext(context); const treeView = await this.buildTreeView(fileNode); const tokenCount = this.enableTokenCounting ? await this.countTokens(truncatedContext) : 0; return { context: truncatedContext, tokenCount, treeView, files: { totalSize: this.totalSize, totalCount: this.totalFiles, processedCount: this.processedFiles } }; } catch (error) { console.error("Error generating summary:", error); throw error; } } }; export { CodebaseAnalyzer, formatSize };