codedetective
Version:
AI-powered tool to analyze codebases, reconstruct development timelines, and suggest where to resume work
385 lines • 11.9 kB
JavaScript
import fs from 'fs';
import path from 'path';
import { glob } from 'glob';
// Map of file extensions to types
const FILE_TYPES = {
// Code
js: 'JavaScript',
ts: 'TypeScript',
jsx: 'React JSX',
tsx: 'React TSX',
py: 'Python',
rb: 'Ruby',
php: 'PHP',
java: 'Java',
c: 'C',
cpp: 'C++',
cs: 'C#',
go: 'Go',
rs: 'Rust',
swift: 'Swift',
kt: 'Kotlin',
scala: 'Scala',
r: 'R',
jl: 'Julia',
// Web
html: 'HTML',
css: 'CSS',
scss: 'SCSS',
sass: 'Sass',
less: 'Less',
// Data
json: 'JSON',
yaml: 'YAML',
yml: 'YAML',
xml: 'XML',
csv: 'CSV',
tsv: 'TSV',
md: 'Markdown',
// Configuration
toml: 'TOML',
ini: 'INI',
conf: 'Config',
config: 'Config',
// Shell
sh: 'Shell',
bash: 'Bash',
zsh: 'Zsh',
bat: 'Batch',
ps1: 'PowerShell'
};
// File categories for grouping similar file types
export var FileCategory;
(function (FileCategory) {
FileCategory["CODE"] = "code";
FileCategory["FIGURE"] = "figure";
FileCategory["DATA_TABLE"] = "data_table";
FileCategory["DOCUMENT"] = "document";
FileCategory["ARCHIVE"] = "archive";
FileCategory["BINARY"] = "binary";
FileCategory["MEDIA"] = "media";
FileCategory["OTHER"] = "other";
})(FileCategory || (FileCategory = {}));
// Map extensions to categories
const FILE_CATEGORIES = {
// Code files
'js': FileCategory.CODE,
'ts': FileCategory.CODE,
'jsx': FileCategory.CODE,
'tsx': FileCategory.CODE,
'py': FileCategory.CODE,
'rb': FileCategory.CODE,
'php': FileCategory.CODE,
'java': FileCategory.CODE,
'c': FileCategory.CODE,
'cpp': FileCategory.CODE,
'cs': FileCategory.CODE,
'go': FileCategory.CODE,
'rs': FileCategory.CODE,
'swift': FileCategory.CODE,
'kt': FileCategory.CODE,
'scala': FileCategory.CODE,
'r': FileCategory.CODE,
'jl': FileCategory.CODE,
'ipynb': FileCategory.CODE,
'html': FileCategory.CODE,
'css': FileCategory.CODE,
'scss': FileCategory.CODE,
'sass': FileCategory.CODE,
'less': FileCategory.CODE,
'sh': FileCategory.CODE,
'bash': FileCategory.CODE,
'zsh': FileCategory.CODE,
'bat': FileCategory.CODE,
'ps1': FileCategory.CODE,
// Figure files
'png': FileCategory.FIGURE,
'jpg': FileCategory.FIGURE,
'jpeg': FileCategory.FIGURE,
'gif': FileCategory.FIGURE,
'bmp': FileCategory.FIGURE,
'svg': FileCategory.FIGURE,
'webp': FileCategory.FIGURE,
'ico': FileCategory.FIGURE,
'pdf': FileCategory.FIGURE,
// Data table files
'csv': FileCategory.DATA_TABLE,
'tsv': FileCategory.DATA_TABLE,
'xlsx': FileCategory.DATA_TABLE,
'xls': FileCategory.DATA_TABLE,
'parquet': FileCategory.DATA_TABLE,
'RData': FileCategory.DATA_TABLE,
'Rdata': FileCategory.DATA_TABLE,
'rda': FileCategory.DATA_TABLE,
// Document files
'doc': FileCategory.DOCUMENT,
'docx': FileCategory.DOCUMENT,
'ppt': FileCategory.DOCUMENT,
'pptx': FileCategory.DOCUMENT,
'md': FileCategory.DOCUMENT,
'txt': FileCategory.DOCUMENT,
// Archive files
'zip': FileCategory.ARCHIVE,
'tar': FileCategory.ARCHIVE,
'gz': FileCategory.ARCHIVE,
'bz2': FileCategory.ARCHIVE,
'7z': FileCategory.ARCHIVE,
'rar': FileCategory.ARCHIVE,
// Binary files
'exe': FileCategory.BINARY,
'dll': FileCategory.BINARY,
'so': FileCategory.BINARY,
'dylib': FileCategory.BINARY,
'bin': FileCategory.BINARY,
// Media files
'mp3': FileCategory.MEDIA,
'mp4': FileCategory.MEDIA,
'wav': FileCategory.MEDIA,
'avi': FileCategory.MEDIA,
'mov': FileCategory.MEDIA,
'mkv': FileCategory.MEDIA,
// Others
'ttf': FileCategory.OTHER,
'woff': FileCategory.OTHER,
'woff2': FileCategory.OTHER,
'eot': FileCategory.OTHER
};
// File extensions that should be considered binary/non-text
const BINARY_EXTENSIONS = new Set([
// Images
'png', 'jpg', 'jpeg', 'gif', 'bmp', 'ico', 'svg', 'webp',
// Documents
'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
// Archives
'zip', 'tar', 'gz', 'bz2', '7z', 'rar',
// Binaries
'exe', 'dll', 'so', 'dylib', 'bin',
// Media
'mp3', 'mp4', 'wav', 'avi', 'mov', 'mkv',
// Others
'ttf', 'woff', 'woff2', 'eot'
]);
/**
* Checks if a file can be read as text
*/
function isTextFile(filename) {
const extension = path.extname(filename).toLowerCase().slice(1);
return !BINARY_EXTENSIONS.has(extension);
}
/**
* Simple class to handle ignoring files based on patterns
*/
class IgnoreMatcher {
patterns = [];
constructor(directory) {
// Default ignore patterns
this.patterns = [
'node_modules/**',
'.git/**',
'dist/**',
'build/**',
'coverage/**'
];
// Add patterns from .gitignore if it exists
const gitignorePath = path.join(directory, '.gitignore');
if (fs.existsSync(gitignorePath)) {
try {
const gitignoreContent = fs.readFileSync(gitignorePath, 'utf8');
const gitignorePatterns = gitignoreContent
.split('\n')
.map(line => line.trim())
.filter(line => line && !line.startsWith('#'));
this.patterns.push(...gitignorePatterns);
}
catch (error) {
console.warn('Error reading .gitignore file:', error);
}
}
}
/**
* Check if a path should be ignored
*/
ignores(filePath) {
return this.patterns.some(pattern => {
// Convert glob pattern to regex
let regexPattern = pattern
.replace(/\./g, '\\.')
.replace(/\*/g, '.*')
.replace(/\?/g, '.');
// Handle directory pattern
if (pattern.endsWith('/')) {
regexPattern += '.*';
}
const regex = new RegExp(`^${regexPattern}$`);
return regex.test(filePath);
});
}
}
/**
* Get file extension and map it to a type
*/
function getFileType(filePath) {
const extension = path.extname(filePath).toLowerCase().slice(1);
return FILE_TYPES[extension] || extension || 'Unknown';
}
/**
* Get file extension and map it to a category
*/
function getFileCategory(filePath) {
const extension = path.extname(filePath).toLowerCase().slice(1);
// Check for case-sensitive extensions like RData
const extension2 = path.extname(filePath).slice(1);
return FILE_CATEGORIES[extension] || FILE_CATEGORIES[extension2] || FileCategory.OTHER;
}
/**
* Get a preview of the file content (first few lines)
*/
function getFilePreview(filePath, maxLength = 1000) {
if (!isTextFile(filePath)) {
return undefined;
}
try {
const content = fs.readFileSync(filePath, 'utf8');
if (content.length <= maxLength) {
return content;
}
return content.slice(0, maxLength) + '...';
}
catch (error) {
return undefined;
}
}
/**
* Analyze a single file
*/
function analyzeFile(filePath, rootPath) {
const stats = fs.statSync(filePath);
const relativePath = path.relative(rootPath, filePath);
return {
path: relativePath,
name: path.basename(filePath),
extension: path.extname(filePath).slice(1),
size: stats.size,
lastModified: stats.mtime,
preview: getFilePreview(filePath),
type: getFileType(filePath),
category: getFileCategory(filePath)
};
}
/**
* Recursively analyze a directory
*/
function analyzeDirectory(directoryPath, rootPath, ignoreMatcher, currentDepth = 0, maxDepth = 3) {
const relativePath = path.relative(rootPath, directoryPath);
const items = fs.readdirSync(directoryPath);
const files = [];
const subdirectories = [];
for (const item of items) {
const itemPath = path.join(directoryPath, item);
const relativeItemPath = path.relative(rootPath, itemPath);
// Skip if item is ignored
if (ignoreMatcher.ignores(relativeItemPath)) {
continue;
}
const stats = fs.statSync(itemPath);
if (stats.isDirectory()) {
if (currentDepth < maxDepth) {
subdirectories.push(analyzeDirectory(itemPath, rootPath, ignoreMatcher, currentDepth + 1, maxDepth));
}
}
else if (stats.isFile()) {
files.push(analyzeFile(itemPath, rootPath));
}
}
return {
path: relativePath || '.',
name: path.basename(directoryPath),
files,
subdirectories,
depth: currentDepth
};
}
/**
* Collect all files recursively from the directory structure
*/
function collectAllFiles(structure) {
const allFiles = [...structure.files];
for (const subdir of structure.subdirectories) {
allFiles.push(...collectAllFiles(subdir));
}
return allFiles;
}
/**
* Analyze repository and generate a comprehensive data structure
*/
export async function analyzeRepository(options) {
const { directory, maxDepth = 3, respectGitignore = true, includePattern, excludePattern, category } = options;
// Ensure directory exists
if (!fs.existsSync(directory)) {
throw new Error(`Directory not found: ${directory}`);
}
// Create ignore matcher
const ignoreMatcher = new IgnoreMatcher(directory);
// Add custom exclude pattern if provided
if (excludePattern && ignoreMatcher instanceof IgnoreMatcher) {
// Add method to IgnoreMatcher class
ignoreMatcher.patterns.push(excludePattern);
}
// Analyze directory structure
const structure = analyzeDirectory(directory, directory, ignoreMatcher, 0, maxDepth);
// Get all files from structure
let allFiles = collectAllFiles(structure);
// Apply include pattern if provided
if (includePattern) {
const includeGlob = await glob(includePattern, { cwd: directory });
allFiles = allFiles.filter(file => includeGlob.includes(file.path));
}
// Filter by category if provided
if (category) {
allFiles = allFiles.filter(file => file.category === category);
}
// Group files by type
const filesByType = {};
const fileTypeDistribution = {};
// Group files by category
const filesByCategory = {};
const fileCategoryDistribution = {};
for (const file of allFiles) {
// Process by type
if (!filesByType[file.type]) {
filesByType[file.type] = [];
}
filesByType[file.type].push(file);
fileTypeDistribution[file.type] = (fileTypeDistribution[file.type] || 0) + 1;
// Process by category
if (!filesByCategory[file.category]) {
filesByCategory[file.category] = [];
}
filesByCategory[file.category].push(file);
fileCategoryDistribution[file.category] = (fileCategoryDistribution[file.category] || 0) + 1;
}
// Calculate total size
const totalSize = allFiles.reduce((sum, file) => sum + file.size, 0);
// Get largest files
const largestFiles = [...allFiles].sort((a, b) => b.size - a.size).slice(0, 10);
// Get recently modified files
const recentlyModifiedFiles = [...allFiles]
.sort((a, b) => b.lastModified.getTime() - a.lastModified.getTime())
.slice(0, 10);
return {
rootPath: directory,
name: path.basename(directory),
structure,
filesByType,
filesByCategory,
totalFiles: allFiles.length,
totalSize,
statistics: {
fileTypeDistribution,
fileCategoryDistribution,
largestFiles,
recentlyModifiedFiles
}
};
}
//# sourceMappingURL=analyzer.js.map