remcode
Version:
Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.
272 lines (268 loc) • 12.2 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.analyzeCommand = analyzeCommand;
const chalk_1 = __importDefault(require("chalk"));
const ora_1 = __importDefault(require("ora"));
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const glob = __importStar(require("glob"));
const source_1 = require("../utils/source");
const config_1 = require("../utils/config");
const logger_1 = require("../utils/logger");
const logger = (0, logger_1.getLogger)('AnalyzeCommand');
// Simple language detection mapping
const LANGUAGE_EXTENSIONS = {
javascript: ['.js', '.jsx'],
typescript: ['.ts', '.tsx'],
python: ['.py'],
java: ['.java'],
kotlin: ['.kt'],
swift: ['.swift'],
ruby: ['.rb'],
go: ['.go'],
rust: ['.rs'],
csharp: ['.cs'],
cpp: ['.cpp', '.cc', '.cxx', '.h', '.hpp'],
php: ['.php'],
html: ['.html', '.htm'],
css: ['.css', '.scss', '.sass', '.less'],
json: ['.json'],
markdown: ['.md', '.markdown'],
yaml: ['.yml', '.yaml'],
shell: ['.sh', '.bash', '.zsh'],
sql: ['.sql'],
xml: ['.xml'],
};
function analyzeCommand(program) {
program
.command('analyze')
.description('Analyze a codebase structure and provide vectorization recommendations')
.argument('<source>', 'Source codebase (GitHub URL or local path)')
.option('-o, --output <path>', 'Output path for analysis report', './codebase_analysis.json')
.option('-c, --config <path>', 'Path to config file')
.option('-i, --ignore <patterns>', 'Patterns to ignore')
.option('-t, --token <token>', 'GitHub token (if source is a GitHub repo)')
.option('-v, --verbose', 'Enable verbose output')
.option('--cache <path>', 'Path to cache directory', './.remcode-cache')
.action(async (source, options) => {
const spinner = (0, ora_1.default)('Preparing analysis').start();
try {
// Enable verbose logging if requested
if (options.verbose) {
logger.info('Verbose logging enabled');
}
logger.info(`Starting analysis of ${source}`);
// Load configuration
const config = (0, config_1.loadConfig)(options.config);
logger.debug('Configuration loaded', config);
// Resolve the source (GitHub or local)
spinner.text = 'Resolving source repository';
const resolvedSource = await (0, source_1.resolveSource)(source, {
token: options.token || process.env.GITHUB_TOKEN,
cache: options.cache
});
logger.info(`Source resolved to ${resolvedSource.path}`);
// Get ignore patterns
const ignorePatterns = options.ignore ?
options.ignore.split(',') :
config.ignore || ['node_modules', 'dist', 'build', '.git', 'coverage', '.next', 'target', '__pycache__'];
// Find all files
spinner.text = 'Scanning repository files';
const allFiles = await glob.glob('**/*', {
cwd: resolvedSource.path,
absolute: true,
nodir: true,
ignore: ignorePatterns.map((pattern) => `**/${pattern}/**`)
});
logger.info(`Found ${allFiles.length} files in repository`);
// Analyze files
spinner.text = 'Analyzing file structure and languages';
const analysis = await analyzeRepository(resolvedSource.path, allFiles);
// Create output directory if it doesn't exist
spinner.text = 'Saving analysis report';
const outputDir = path.dirname(options.output);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
// Write analysis to file
fs.writeFileSync(options.output, JSON.stringify(analysis, null, 2));
// Generate a summary report for the console
const summary = `
${chalk_1.default.bold(chalk_1.default.blue('Remcode Analysis Summary'))}
${chalk_1.default.bold('Repository Information:')}
Name: ${chalk_1.default.cyan(analysis.repository_info.name)}
Total Files: ${chalk_1.default.cyan(analysis.summary.total_files)}
Code Files: ${chalk_1.default.cyan(analysis.summary.code_files)}
Languages: ${chalk_1.default.cyan(analysis.summary.languages_detected)}
${chalk_1.default.bold('Top Languages:')}
${Object.entries(analysis.language_breakdown)
.sort((a, b) => b[1].files - a[1].files)
.slice(0, 5)
.map(([lang, stats]) => ` ${chalk_1.default.yellow(lang)}: ${chalk_1.default.cyan(stats.files)} files (${chalk_1.default.green(stats.percentage)}%)`)
.join('\n')}
${chalk_1.default.bold('Vectorization Strategy:')}
Estimated Chunks: ${chalk_1.default.cyan(analysis.vectorization_recommendations.estimated_chunks)}
Primary Model: ${chalk_1.default.yellow(analysis.vectorization_recommendations.embedding_models.primary)}
Priority Files: ${chalk_1.default.cyan(analysis.vectorization_recommendations.priority_files.length)}
`;
spinner.succeed(`Analysis complete. Report saved to ${chalk_1.default.cyan(options.output)}`);
console.log(summary);
// Log next steps
console.log(chalk_1.default.green('\nNext steps:'));
console.log(` 1. Run ${chalk_1.default.cyan(`remcode vectorize ${source} --analysis ${options.output}`)} to vectorize this codebase`);
console.log(` 2. Or explore the analysis report at ${chalk_1.default.cyan(options.output)}\n`);
logger.info('Analysis command completed successfully');
}
catch (error) {
spinner.fail('Analysis failed');
logger.error('Analysis failed with error', error instanceof Error ? error : undefined);
console.error(chalk_1.default.red(error instanceof Error ? error.message : 'Unknown error'));
if (error instanceof Error && error.stack && options.verbose) {
console.error(chalk_1.default.gray(error.stack));
}
console.log(chalk_1.default.yellow('\nTroubleshooting tips:'));
console.log(' - Check your internet connection if using a GitHub repository');
console.log(' - Ensure you have the necessary permissions to access the repository');
console.log(' - Run with --verbose for more detailed error information');
process.exit(1);
}
});
}
async function analyzeRepository(repoPath, allFiles) {
const repoName = path.basename(repoPath);
// Create extension to language map
const extensionToLanguage = new Map();
for (const [language, extensions] of Object.entries(LANGUAGE_EXTENSIONS)) {
for (const ext of extensions) {
extensionToLanguage.set(ext, language);
}
}
// Analyze language distribution
const languageStats = {};
let totalLines = 0;
let codeFiles = 0;
let largestFile = '';
let largestFileLines = 0;
// Process files in batches to avoid memory issues
const batchSize = 50;
for (let i = 0; i < allFiles.length; i += batchSize) {
const batch = allFiles.slice(i, i + batchSize);
await Promise.all(batch.map(async (file) => {
try {
const extension = path.extname(file).toLowerCase();
const language = extensionToLanguage.get(extension) || 'other';
if (!languageStats[language]) {
languageStats[language] = { files: 0, lines: 0 };
}
languageStats[language].files++;
// Only count lines for known code files to avoid binary files
if (language !== 'other') {
codeFiles++;
const content = await fs.promises.readFile(file, 'utf-8');
const lineCount = content.split('\n').length;
languageStats[language].lines += lineCount;
totalLines += lineCount;
// Track largest file
if (lineCount > largestFileLines) {
largestFileLines = lineCount;
largestFile = path.relative(repoPath, file);
}
}
}
catch (error) {
// Skip files that can't be read (likely binary)
logger.debug(`Skipping file ${file}: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}));
}
// Calculate percentages
const languageBreakdown = {};
for (const [language, stats] of Object.entries(languageStats)) {
languageBreakdown[language] = {
files: stats.files,
lines: stats.lines,
percentage: totalLines > 0 ? Math.round((stats.lines / totalLines) * 100) : 0
};
}
// Estimate chunks based on code lines (assuming ~50 lines per chunk on average)
const estimatedChunks = Math.ceil(totalLines / 50);
// Identify priority files (large files that should be chunked)
const priorityFiles = allFiles
.filter(file => {
const extension = path.extname(file).toLowerCase();
return extensionToLanguage.has(extension);
})
.map(file => ({
path: path.relative(repoPath, file),
size: fs.statSync(file).size
}))
.sort((a, b) => b.size - a.size)
.slice(0, 10)
.map(f => f.path);
return {
repository_info: {
name: repoName,
total_files: allFiles.length,
total_lines: totalLines,
analyzed_at: new Date().toISOString(),
analysis_version: '1.0.0-simplified'
},
language_breakdown: languageBreakdown,
summary: {
total_files: allFiles.length,
code_files: codeFiles,
languages_detected: Object.keys(languageBreakdown).length,
largest_file: largestFile,
largest_file_lines: largestFileLines
},
vectorization_recommendations: {
chunking_strategy: {
clean_modules: "module_level",
complex_modules: "file_level",
monolithic_files: "sliding_window_with_high_overlap"
},
embedding_models: {
primary: "microsoft/graphcodebert-base",
fallback: "microsoft/codebert-base"
},
estimated_chunks: estimatedChunks,
priority_files: priorityFiles
}
};
}
;