@husniadil/codebase-analyzer
Version:
A compr ehensive tool for analyzing and summarizing codebases, supporting multiple file types and providing detailed file tree views and context extraction.
247 lines (242 loc) • 7.43 kB
JavaScript
// src/codebase-analyzer.ts
import { promises as fs } from "node:fs";
import * as path from "node:path";
import { encodingForModel } from "js-tiktoken";
// src/util.ts
var formatSize = (bytes) => {
if (bytes === 0) return "0 bytes";
const units = ["bytes", "KB", "MB", "GB", "TB"];
const size = Math.abs(bytes);
const index = Math.floor(Math.log(size) / Math.log(1024));
const formattedSize = (size / 1024 ** index).toPrecision(3);
return `${Math.sign(bytes) * Number(formattedSize)} ${units[index]}`;
};
// src/codebase-analyzer.ts
var CodebaseAnalyzer = class {
directory;
relevantExtensions;
maxFileSize;
enableTokenCounting;
maxTokens;
ignorePatterns;
ignoreFilesWithNoExtension;
memoryLimitMB;
processedFiles = 0;
totalFiles = 0;
totalSize = 0;
constructor(config = {}) {
const {
directory = ".",
relevantExtensions = [
".js",
".ts",
".jsx",
".tsx",
".json",
".java",
".kt",
".swift",
".c",
".cpp",
".h",
".go",
".py",
".rb",
".php",
".html",
".css",
".scss",
".less"
],
maxFileSize = 1e5,
enableTokenCounting = false,
maxTokens = 1e5,
ignorePatterns = [
"node_modules",
"vendor",
"dist",
"build",
"public",
"android",
"fastlane",
"ios",
"tmp",
"package.lock.json"
],
ignoreFilesWithNoExtension = true,
memoryLimitMB = 64
} = config;
if (directory.trim() === "") {
throw new Error("directory must not be empty.");
}
this.directory = path.resolve(directory);
this.relevantExtensions = relevantExtensions;
this.maxFileSize = maxFileSize;
this.enableTokenCounting = enableTokenCounting;
this.maxTokens = maxTokens;
this.ignorePatterns = ignorePatterns;
this.ignoreFilesWithNoExtension = ignoreFilesWithNoExtension;
this.memoryLimitMB = memoryLimitMB;
}
async shouldIgnore(filePath) {
const relativePath = path.relative(this.directory, filePath);
const patterns = this.ignorePatterns.map((pattern) => new RegExp(pattern));
return relativePath.startsWith(".") || patterns.some((pattern) => pattern.test(relativePath) || pattern.test(path.basename(filePath)));
}
hasExtension(filePath) {
return path.extname(filePath) !== "";
}
async isRelevantFile(filePath) {
if (this.ignoreFilesWithNoExtension && !this.hasExtension(filePath)) {
return false;
}
try {
const stats = await fs.stat(filePath);
if (!stats.isFile()) return false;
return stats.size <= this.maxFileSize && this.relevantExtensions.some((ext) => filePath.endsWith(ext));
} catch (error) {
console.warn(`Error checking relevance of file ${filePath}:`, error);
return false;
}
}
async gatherFiles(dir) {
const files = [];
try {
const items = await fs.readdir(dir);
const filePromises = items.map(async (item) => {
const fullPath = path.join(dir, item);
if (await this.shouldIgnore(fullPath)) return null;
const stats = await fs.stat(fullPath);
const node = {
name: item,
path: fullPath,
size: stats.size,
isDirectory: stats.isDirectory(),
children: []
};
if (stats.isDirectory()) {
node.children = await this.gatherFiles(fullPath);
if (node.children.length > 0) {
return node;
}
} else if (await this.isRelevantFile(fullPath)) {
this.totalSize += stats.size;
return node;
}
return null;
});
const resolvedFiles = await Promise.all(filePromises);
for (const file of resolvedFiles) {
if (file) {
files.push(file);
}
}
} catch (error) {
console.warn(`Error gathering files in directory ${dir}:`, error);
}
return files;
}
async countTotalFiles(nodes) {
const counts = await Promise.all(
nodes.map(async (node) => {
if (node.isDirectory) {
return await this.countTotalFiles(node.children);
} else {
return 1;
}
})
);
return counts.reduce((acc, count) => acc + count, 0);
}
checkMemoryUsage() {
const usedMemoryMB = process.memoryUsage().heapUsed / 1024 / 1024;
if (usedMemoryMB > this.memoryLimitMB) {
console.error(
`Codebase ~ checkMemoryUsage ~ memory limit exceeded: ${usedMemoryMB.toFixed(
2
)} MB used, limit is ${this.memoryLimitMB} MB.`
);
throw new Error("Your codebase is too large to process. Please try again with a smaller one.");
}
}
async processFileTree(nodes, context) {
for (const node of nodes) {
if (node.isDirectory) {
await this.processFileTree(node.children, context);
} else {
try {
const content = await fs.readFile(node.path, "utf-8");
context.push(`File: ${node.path} (${formatSize(node.size)})
${content.trim()}
${"=".repeat(20)}
`);
this.processedFiles++;
} catch (error) {
console.warn("Codebase ~ gatherContext ~ error:", node.path, error);
}
}
this.checkMemoryUsage();
}
}
async gatherContext(fileTree) {
this.totalFiles = await this.countTotalFiles(fileTree);
const context = [];
await this.processFileTree(fileTree, context);
if (context.length === 0) {
throw new Error("No relevant files found.");
}
return context.join("\n");
}
truncateContext(context) {
const tokens = context.split(/\s+/);
if (this.enableTokenCounting ? tokens.length > this.maxTokens : false) {
console.warn("Context truncated due to token limit.");
return tokens.slice(0, this.maxTokens).join(" ");
}
return context;
}
async countTokens(code) {
const encoder = encodingForModel("gpt-4o");
const tokens = encoder.encode(code);
return tokens.length;
}
async buildTreeView(nodes, indent = "") {
let treeView = "";
for (const [index, node] of nodes.entries()) {
const isLastItem = index === nodes.length - 1;
const size = node.isDirectory ? "" : ` ${formatSize(node.size)}`;
treeView += `${indent}${isLastItem ? "\u2514\u2500\u2500" : "\u251C\u2500\u2500"} ${node.isDirectory ? "\u{1F4C1}" : "\u2630"} ${node.name}${size}
`;
if (node.isDirectory) {
treeView += await this.buildTreeView(node.children, `${indent}${isLastItem ? " " : "\u2502 "}`);
}
}
return treeView;
}
async analyze() {
try {
const fileNode = await this.gatherFiles(this.directory);
const context = await this.gatherContext(fileNode);
const truncatedContext = this.truncateContext(context);
const treeView = await this.buildTreeView(fileNode);
const tokenCount = this.enableTokenCounting ? await this.countTokens(truncatedContext) : 0;
return {
context: truncatedContext,
tokenCount,
treeView,
files: {
totalSize: this.totalSize,
totalCount: this.totalFiles,
processedCount: this.processedFiles
}
};
} catch (error) {
console.error("Error generating summary:", error);
throw error;
}
}
};
export {
CodebaseAnalyzer,
formatSize
};