UNPKG

remcode

Version:

Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.

github.com/harshit-codes/remcode

harshit-codes/remcode

272 lines (268 loc) • 12.2 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.analyzeCommand = analyzeCommand; const chalk_1 = __importDefault(require("chalk")); const ora_1 = __importDefault(require("ora")); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const glob = __importStar(require("glob")); const source_1 = require("../utils/source"); const config_1 = require("../utils/config"); const logger_1 = require("../utils/logger"); const logger = (0, logger_1.getLogger)('AnalyzeCommand'); // Simple language detection mapping const LANGUAGE_EXTENSIONS = { javascript: ['.js', '.jsx'], typescript: ['.ts', '.tsx'], python: ['.py'], java: ['.java'], kotlin: ['.kt'], swift: ['.swift'], ruby: ['.rb'], go: ['.go'], rust: ['.rs'], csharp: ['.cs'], cpp: ['.cpp', '.cc', '.cxx', '.h', '.hpp'], php: ['.php'], html: ['.html', '.htm'], css: ['.css', '.scss', '.sass', '.less'], json: ['.json'], markdown: ['.md', '.markdown'], yaml: ['.yml', '.yaml'], shell: ['.sh', '.bash', '.zsh'], sql: ['.sql'], xml: ['.xml'], }; function analyzeCommand(program) { program .command('analyze') .description('Analyze a codebase structure and provide vectorization recommendations') .argument('<source>', 'Source codebase (GitHub URL or local path)') .option('-o, --output <path>', 'Output path for analysis report', './codebase_analysis.json') .option('-c, --config <path>', 'Path to config file') .option('-i, --ignore <patterns>', 'Patterns to ignore') .option('-t, --token <token>', 'GitHub token (if source is a GitHub repo)') .option('-v, --verbose', 'Enable verbose output') .option('--cache <path>', 'Path to cache directory', './.remcode-cache') .action(async (source, options) => { const spinner = (0, ora_1.default)('Preparing analysis').start(); try { // Enable verbose logging if requested if (options.verbose) { logger.info('Verbose logging enabled'); } logger.info(`Starting analysis of ${source}`); // Load configuration const config = (0, config_1.loadConfig)(options.config); logger.debug('Configuration loaded', config); // Resolve the source (GitHub or local) spinner.text = 'Resolving source repository'; const resolvedSource = await (0, source_1.resolveSource)(source, { token: options.token || process.env.GITHUB_TOKEN, cache: options.cache }); logger.info(`Source resolved to ${resolvedSource.path}`); // Get ignore patterns const ignorePatterns = options.ignore ? options.ignore.split(',') : config.ignore || ['node_modules', 'dist', 'build', '.git', 'coverage', '.next', 'target', '__pycache__']; // Find all files spinner.text = 'Scanning repository files'; const allFiles = await glob.glob('**/*', { cwd: resolvedSource.path, absolute: true, nodir: true, ignore: ignorePatterns.map((pattern) => `**/${pattern}/**`) }); logger.info(`Found ${allFiles.length} files in repository`); // Analyze files spinner.text = 'Analyzing file structure and languages'; const analysis = await analyzeRepository(resolvedSource.path, allFiles); // Create output directory if it doesn't exist spinner.text = 'Saving analysis report'; const outputDir = path.dirname(options.output); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } // Write analysis to file fs.writeFileSync(options.output, JSON.stringify(analysis, null, 2)); // Generate a summary report for the console const summary = ` ${chalk_1.default.bold(chalk_1.default.blue('Remcode Analysis Summary'))} ${chalk_1.default.bold('Repository Information:')} Name: ${chalk_1.default.cyan(analysis.repository_info.name)} Total Files: ${chalk_1.default.cyan(analysis.summary.total_files)} Code Files: ${chalk_1.default.cyan(analysis.summary.code_files)} Languages: ${chalk_1.default.cyan(analysis.summary.languages_detected)} ${chalk_1.default.bold('Top Languages:')} ${Object.entries(analysis.language_breakdown) .sort((a, b) => b[1].files - a[1].files) .slice(0, 5) .map(([lang, stats]) => ` ${chalk_1.default.yellow(lang)}: ${chalk_1.default.cyan(stats.files)} files (${chalk_1.default.green(stats.percentage)}%)`) .join('\n')} ${chalk_1.default.bold('Vectorization Strategy:')} Estimated Chunks: ${chalk_1.default.cyan(analysis.vectorization_recommendations.estimated_chunks)} Primary Model: ${chalk_1.default.yellow(analysis.vectorization_recommendations.embedding_models.primary)} Priority Files: ${chalk_1.default.cyan(analysis.vectorization_recommendations.priority_files.length)} `; spinner.succeed(`Analysis complete. Report saved to ${chalk_1.default.cyan(options.output)}`); console.log(summary); // Log next steps console.log(chalk_1.default.green('\nNext steps:')); console.log(` 1. Run ${chalk_1.default.cyan(`remcode vectorize ${source} --analysis ${options.output}`)} to vectorize this codebase`); console.log(` 2. Or explore the analysis report at ${chalk_1.default.cyan(options.output)}\n`); logger.info('Analysis command completed successfully'); } catch (error) { spinner.fail('Analysis failed'); logger.error('Analysis failed with error', error instanceof Error ? error : undefined); console.error(chalk_1.default.red(error instanceof Error ? error.message : 'Unknown error')); if (error instanceof Error && error.stack && options.verbose) { console.error(chalk_1.default.gray(error.stack)); } console.log(chalk_1.default.yellow('\nTroubleshooting tips:')); console.log(' - Check your internet connection if using a GitHub repository'); console.log(' - Ensure you have the necessary permissions to access the repository'); console.log(' - Run with --verbose for more detailed error information'); process.exit(1); } }); } async function analyzeRepository(repoPath, allFiles) { const repoName = path.basename(repoPath); // Create extension to language map const extensionToLanguage = new Map(); for (const [language, extensions] of Object.entries(LANGUAGE_EXTENSIONS)) { for (const ext of extensions) { extensionToLanguage.set(ext, language); } } // Analyze language distribution const languageStats = {}; let totalLines = 0; let codeFiles = 0; let largestFile = ''; let largestFileLines = 0; // Process files in batches to avoid memory issues const batchSize = 50; for (let i = 0; i < allFiles.length; i += batchSize) { const batch = allFiles.slice(i, i + batchSize); await Promise.all(batch.map(async (file) => { try { const extension = path.extname(file).toLowerCase(); const language = extensionToLanguage.get(extension) || 'other'; if (!languageStats[language]) { languageStats[language] = { files: 0, lines: 0 }; } languageStats[language].files++; // Only count lines for known code files to avoid binary files if (language !== 'other') { codeFiles++; const content = await fs.promises.readFile(file, 'utf-8'); const lineCount = content.split('\n').length; languageStats[language].lines += lineCount; totalLines += lineCount; // Track largest file if (lineCount > largestFileLines) { largestFileLines = lineCount; largestFile = path.relative(repoPath, file); } } } catch (error) { // Skip files that can't be read (likely binary) logger.debug(`Skipping file ${file}: ${error instanceof Error ? error.message : 'Unknown error'}`); } })); } // Calculate percentages const languageBreakdown = {}; for (const [language, stats] of Object.entries(languageStats)) { languageBreakdown[language] = { files: stats.files, lines: stats.lines, percentage: totalLines > 0 ? Math.round((stats.lines / totalLines) * 100) : 0 }; } // Estimate chunks based on code lines (assuming ~50 lines per chunk on average) const estimatedChunks = Math.ceil(totalLines / 50); // Identify priority files (large files that should be chunked) const priorityFiles = allFiles .filter(file => { const extension = path.extname(file).toLowerCase(); return extensionToLanguage.has(extension); }) .map(file => ({ path: path.relative(repoPath, file), size: fs.statSync(file).size })) .sort((a, b) => b.size - a.size) .slice(0, 10) .map(f => f.path); return { repository_info: { name: repoName, total_files: allFiles.length, total_lines: totalLines, analyzed_at: new Date().toISOString(), analysis_version: '1.0.0-simplified' }, language_breakdown: languageBreakdown, summary: { total_files: allFiles.length, code_files: codeFiles, languages_detected: Object.keys(languageBreakdown).length, largest_file: largestFile, largest_file_lines: largestFileLines }, vectorization_recommendations: { chunking_strategy: { clean_modules: "module_level", complex_modules: "file_level", monolithic_files: "sliding_window_with_high_overlap" }, embedding_models: { primary: "microsoft/graphcodebert-base", fallback: "microsoft/codebert-base" }, estimated_chunks: estimatedChunks, priority_files: priorityFiles } }; }