UNPKG

remcode

Version:

Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.

387 lines (386 loc) 15.5 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.FileAnalyzer = void 0; const logger_1 = require("../utils/logger"); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const logger = (0, logger_1.getLogger)('FileAnalyzer'); class FileAnalyzer { constructor(repoPath = process.cwd(), options = {}) { this.repoPath = repoPath; this.maxFileSize = options.maxFileSizeBytes || 1024 * 1024; // Default 1MB } /** * Analyze a list of changed files to determine their characteristics */ async analyzeChangedFiles(changes, options = {}) { logger.info(`Analyzing ${changes.length} changed files`); const opts = { basePath: this.repoPath, skipAST: options.skipAST || false, skipDependencies: options.skipDependencies || false, maxFileSizeBytes: options.maxFileSizeBytes || this.maxFileSize }; // Filter out deleted files const activeChanges = changes.filter(change => change.status !== 'deleted'); // Process in batches to avoid memory issues with large repos const batchSize = 20; const results = []; for (let i = 0; i < activeChanges.length; i += batchSize) { const batch = activeChanges.slice(i, i + batchSize); logger.debug(`Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(activeChanges.length / batchSize)}`); // Process files in parallel within each batch const batchResults = await Promise.all(batch.map(change => this.analyzeFile(change.path, opts))); results.push(...batchResults); } logger.info(`Completed analysis of ${results.length} files`); return results; } /** * Analyze a single file to determine its characteristics */ async analyzeFile(filePath, options) { const absolutePath = path.isAbsolute(filePath) ? filePath : path.join(options.basePath, filePath); const relativePath = path.isAbsolute(filePath) ? path.relative(options.basePath, filePath) : filePath; logger.debug(`Analyzing file: ${relativePath}`); // Basic file info const language = this.detectLanguage(relativePath); const category = this.categorizeFile(relativePath); // Default analysis with minimal info const analysis = { path: relativePath, category, language, complexity: 'low', size: 0, chunkingStrategy: this.determineChunkingStrategy(relativePath, language) }; try { // Check if file exists and get its stats if (!fs.existsSync(absolutePath)) { logger.warn(`File not found: ${absolutePath}`); return analysis; } const stats = fs.statSync(absolutePath); analysis.size = stats.size; // Skip very large files to avoid memory issues if (stats.size > (options.maxFileSizeBytes || this.maxFileSize)) { logger.warn(`Skipping detailed analysis of large file (${stats.size} bytes): ${relativePath}`); analysis.complexity = 'high'; // Assume large files are complex return analysis; } // Read file content const content = fs.readFileSync(absolutePath, 'utf8'); // Count source lines of code (excluding comments and blank lines) analysis.sloc = this.countSourceLines(content, language); // Set complexity based on SLOC analysis.complexity = this.determineComplexity(analysis.sloc, language); // Skip AST parsing if requested if (options.skipAST) { return analysis; } // Additional language-specific analysis if (language === 'typescript' || language === 'javascript') { this.analyzeJsTs(content, analysis, language); } else if (language === 'python') { this.analyzePython(content, analysis); } // Determine chunking strategy based on detailed analysis analysis.chunkingStrategy = this.refineChunkingStrategy(analysis); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Error analyzing file ${relativePath}: ${errorMsg}`); // Return basic analysis with what we have } return analysis; } /** * Analyze JavaScript/TypeScript code */ analyzeJsTs(content, analysis, language) { try { // Using regex-based analysis instead of full AST parsing for simplicity // Extract imports, exports, functions, and classes analysis.imports = []; analysis.exports = []; analysis.functions = []; analysis.classes = []; // Simple extraction of imports (without full AST traversal) const importRegex = /import\s+(?:(?:\{[^\}]*\}|\*\s+as\s+[\w$]+|[\w$]+)\s+from\s+)?['"]([^'"]+)['"];?/g; let match; while ((match = importRegex.exec(content)) !== null) { if (match[1]) analysis.imports.push(match[1]); } // Simple extraction of exports (without full AST traversal) const exportRegex = /export\s+(?:(?:default\s+)?(?:class|function|const|let|var)\s+([\w$]+))/g; while ((match = exportRegex.exec(content)) !== null) { if (match[1]) analysis.exports.push(match[1]); } // Simple extraction of functions and classes (without full AST traversal) const functionRegex = /(?:function\s+([\w$]+)|(?:const|let|var)\s+([\w$]+)\s*=\s*(?:function|\([^)]*\)\s*=>))/g; while ((match = functionRegex.exec(content)) !== null) { const name = match[1] || match[2]; if (name) analysis.functions.push(name); } const classRegex = /class\s+([\w$]+)/g; while ((match = classRegex.exec(content)) !== null) { if (match[1]) analysis.classes.push(match[1]); } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.warn(`Error parsing ${language} code: ${errorMsg}`); } } /** * Analyze Python code */ analyzePython(content, analysis) { try { // Simple regex-based analysis for Python (without full AST parsing) analysis.imports = []; analysis.functions = []; analysis.classes = []; // Extract imports const importRegex = /(?:from\s+([\w.]+)\s+import|import\s+([\w.]+))/g; let match; while ((match = importRegex.exec(content)) !== null) { const importName = match[1] || match[2]; if (importName) analysis.imports.push(importName); } // Extract functions const functionRegex = /def\s+([\w_]+)\s*\(/g; while ((match = functionRegex.exec(content)) !== null) { if (match[1]) analysis.functions.push(match[1]); } // Extract classes const classRegex = /class\s+([\w_]+)\s*(?:\([^)]*\))?:/g; while ((match = classRegex.exec(content)) !== null) { if (match[1]) analysis.classes.push(match[1]); } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.warn(`Error analyzing Python code: ${errorMsg}`); } } /** * Count source lines of code (excluding comments and blank lines) */ countSourceLines(content, language) { // Remove comments based on language let noComments = content; if (language === 'typescript' || language === 'javascript') { // Remove JS/TS comments noComments = noComments .replace(/\/\/.*$/gm, '') // Single line comments .replace(/\/\*[\s\S]*?\*\//g, ''); // Multi-line comments } else if (language === 'python') { // Remove Python comments noComments = noComments .replace(/#.*$/gm, '') // Single line comments .replace(/'''[\s\S]*?'''/g, '') // Triple single quote docstrings .replace(/"""[\s\S]*?"""/g, ''); // Triple double quote docstrings } // Split into lines and count non-empty lines return noComments.split('\n') .filter(line => line.trim().length > 0) .length; } /** * Determine the complexity of a file based on its characteristics */ determineComplexity(sloc, language) { // Simple complexity determination based on SLOC if (sloc < 100) return 'low'; if (sloc < 500) return 'medium'; return 'high'; } /** * Determine the initial chunking strategy based on file path and language */ determineChunkingStrategy(filePath, language) { const filename = path.basename(filePath).toLowerCase(); // Special case for specific file types if (filename === 'package.json' || filename === 'tsconfig.json' || filename.endsWith('.config.js')) { return 'file_level'; } // Language-specific defaults if (language === 'typescript' || language === 'javascript') { return 'function_level'; } else if (language === 'python') { return 'function_level'; } return 'sliding_window'; } /** * Refine the chunking strategy based on the detailed analysis */ refineChunkingStrategy(analysis) { // Start with the initial strategy let strategy = analysis.chunkingStrategy; // If file is very small, use file_level if (analysis.size < 1000 || (analysis.sloc && analysis.sloc < 50)) { return 'file_level'; } // If file has many classes but few functions, use class_level if (analysis.classes && analysis.classes.length > 1 && (!analysis.functions || analysis.functions.length / analysis.classes.length < 5)) { return 'class_level'; } // If file is very complex, use hybrid approach if (analysis.complexity === 'high') { return 'sliding_window_with_overlap'; } return strategy; } /** * Detect the programming language of a file based on its extension */ detectLanguage(filePath) { const ext = path.extname(filePath).toLowerCase(); const langMap = { // TypeScript/JavaScript '.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript', // Python '.py': 'python', '.pyi': 'python', '.pyx': 'python', // Java and JVM '.java': 'java', '.kt': 'kotlin', '.groovy': 'groovy', '.scala': 'scala', // C-family '.c': 'c', '.cpp': 'cpp', '.cc': 'cpp', '.h': 'c', '.hpp': 'cpp', // C# '.cs': 'csharp', // Go '.go': 'go', // Rust '.rs': 'rust', // Swift '.swift': 'swift', // Web '.html': 'html', '.htm': 'html', '.css': 'css', '.scss': 'scss', '.sass': 'sass', '.less': 'less', // PHP '.php': 'php', // Ruby '.rb': 'ruby', // Shell '.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', // Data formats '.json': 'json', '.yml': 'yaml', '.yaml': 'yaml', '.xml': 'xml', // Markdown and docs '.md': 'markdown', '.mdx': 'markdown', '.rst': 'restructuredtext', }; return langMap[ext] || 'unknown'; } /** * Categorize a file based on its path and name */ categorizeFile(filePath) { const filename = path.basename(filePath).toLowerCase(); const dirPath = path.dirname(filePath).toLowerCase(); // Test files if (filename.includes('test') || filename.includes('spec') || dirPath.includes('test') || dirPath.includes('spec') || dirPath.includes('__tests__')) { return 'test'; } // Config files if (filename.includes('config') || filename.includes('setup') || filename.endsWith('.json') || filename.endsWith('.yml') || filename.endsWith('.yaml') || filename.endsWith('.ini') || filename === '.env' || filename.startsWith('.') || dirPath.includes('config')) { return 'config'; } // Priority files (entry points, core modules) if (filename === 'index.ts' || filename === 'index.js' || filename === 'main.ts' || filename === 'main.js' || filename === 'app.ts' || filename === 'app.js' || dirPath.includes('core') || dirPath.includes('main')) { return 'priority'; } // Files to ignore if (filename.endsWith('.min.js') || filename.endsWith('.d.ts') || dirPath.includes('node_modules') || dirPath.includes('dist') || dirPath.includes('build')) { return 'ignore'; } return 'normal'; } } exports.FileAnalyzer = FileAnalyzer;