remcode
Version:
Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.
196 lines (190 loc) ⢠9.83 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.vectorizeCommand = vectorizeCommand;
const chalk_1 = __importDefault(require("chalk"));
const ora_1 = __importDefault(require("ora"));
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const source_1 = require("../utils/source");
const config_1 = require("../utils/config");
const pipeline_1 = require("../vectorizers/pipeline");
const logger_1 = require("../utils/logger");
const logger = (0, logger_1.getLogger)('VectorizeCommand');
function vectorizeCommand(program) {
program
.command('vectorize')
.description('Vectorize a codebase using the new pipeline')
.argument('<source>', 'Source codebase (GitHub URL or local path)')
.option('-a, --analysis <path>', 'Path to analysis report', './codebase_analysis.json')
.option('-o, --output <path>', 'Output path for vectorization report', './vectorization_report.md')
.option('-c, --config <path>', 'Path to config file')
.option('-p, --pinecone-key <key>', 'Pinecone API key')
.option('-i, --index <name>', 'Pinecone index name')
.option('-n, --namespace <name>', 'Pinecone namespace', 'default')
.option('-e, --environment <env>', 'Pinecone environment', 'gcp-starter')
.option('-m, --model <model>', 'Embedding model to use', 'microsoft/graphcodebert-base')
.option('-f, --fallback <model>', 'Fallback embedding model', 'sentence-transformers/all-MiniLM-L6-v2')
.option('-b, --batch-size <number>', 'Batch size for processing', '10')
.option('-v, --verbose', 'Enable verbose output')
.option('--timeout <seconds>', 'Maximum time for vectorization in seconds', '7200')
.option('--cache <path>', 'Path to cache directory', './.remcode-cache')
.action(async (source, options) => {
const spinner = (0, ora_1.default)('Initializing vectorization...').start();
try {
// Load configuration
const config = (0, config_1.loadConfig)(options.config);
// Resolve source path
const resolvedSource = await (0, source_1.resolveSource)(source);
const sourcePath = resolvedSource.path || source;
if (!fs.existsSync(sourcePath)) {
throw new Error(`Source path does not exist: ${sourcePath}`);
}
// Get API keys
const pineconeApiKey = options.pineconeKey ||
process.env.PINECONE_API_KEY ||
config.vectorization?.storage?.pinecone?.apiKey;
const huggingfaceToken = process.env.HUGGINGFACE_TOKEN ||
undefined;
if (!pineconeApiKey) {
throw new Error('Pinecone API key is required. Use --pinecone-key option or set PINECONE_API_KEY environment variable.');
}
if (!huggingfaceToken) {
throw new Error('HuggingFace token is required. Set HUGGINGFACE_TOKEN environment variable.');
}
// Determine index name
const indexName = options.index ||
config.vectorization?.storage?.indexes?.moduleName ||
`remcode-${path.basename(sourcePath).replace(/[^a-zA-Z0-9]/g, '-').toLowerCase()}`;
spinner.text = 'Setting up vectorization pipeline...';
// Initialize vectorization pipeline
const pipeline = new pipeline_1.VectorizationPipeline({
pineconeApiKey,
pineconeIndexName: indexName,
pineconeNamespace: options.namespace,
pineconeEnvironment: options.environment,
huggingfaceToken,
embeddingModel: options.model,
fallbackModel: options.fallback,
batchSize: parseInt(options.batchSize),
maxFileSize: 1024 * 1024, // 1MB
includeExtensions: ['.ts', '.js', '.jsx', '.tsx', '.py', '.java', '.go', '.rb', '.php', '.cpp', '.c', '.cs', '.rs'],
excludeExtensions: ['.min.js', '.bundle.js', '.test.js', '.spec.js', '.d.ts'],
excludePaths: ['node_modules', '.git', 'dist', 'build', '__pycache__', '.pytest_cache', 'coverage']
});
await pipeline.initialize();
spinner.text = 'Processing codebase...';
// Process the directory
const result = await pipeline.processDirectory(sourcePath);
spinner.stop();
// Display results
if (result.success) {
console.log(chalk_1.default.green('ā
Vectorization completed successfully!'));
console.log(chalk_1.default.blue(`š Files processed: ${result.filesProcessed}`));
console.log(chalk_1.default.blue(`š§© Chunks created: ${result.chunksCreated}`));
console.log(chalk_1.default.blue(`š¢ Vectors stored: ${result.vectorsStored}`));
console.log(chalk_1.default.blue(`ā±ļø Duration: ${Math.round(result.duration / 1000)}s`));
if (result.errors.length > 0) {
console.log(chalk_1.default.yellow(`ā ļø Warnings: ${result.errors.length} files had issues`));
if (options.verbose) {
result.errors.forEach(error => console.log(chalk_1.default.yellow(` ${error}`)));
}
}
}
else {
console.log(chalk_1.default.red('ā Vectorization failed'));
console.log(chalk_1.default.red(`š Files processed: ${result.filesProcessed}`));
console.log(chalk_1.default.red(`ā Errors: ${result.errors.length}`));
result.errors.forEach(error => console.log(chalk_1.default.red(` ${error}`)));
}
// Generate report
await generateVectorizationReport(result, options.output, {
source: sourcePath,
indexName,
namespace: options.namespace,
model: options.model,
batchSize: options.batchSize
});
// Test search functionality
if (result.success && result.vectorsStored > 0) {
console.log(chalk_1.default.blue('\nš Testing search functionality...'));
try {
const searchResults = await pipeline.searchSimilarCode('function authentication', 3);
console.log(chalk_1.default.green(`ā
Search test successful: found ${searchResults.length} results`));
}
catch (error) {
console.log(chalk_1.default.yellow(`ā ļø Search test failed: ${error instanceof Error ? error.message : String(error)}`));
}
}
}
catch (error) {
spinner.stop();
const errorMessage = error instanceof Error ? error.message : String(error);
console.error(chalk_1.default.red(`ā Vectorization failed: ${errorMessage}`));
logger.error(`Vectorization error: ${errorMessage}`);
process.exit(1);
}
});
}
async function generateVectorizationReport(result, outputPath, metadata) {
const report = `# Vectorization Report
## Summary
- **Status**: ${result.success ? 'ā
Success' : 'ā Failed'}
- **Files Processed**: ${result.filesProcessed}
- **Chunks Created**: ${result.chunksCreated}
- **Vectors Stored**: ${result.vectorsStored}
- **Duration**: ${Math.round(result.duration / 1000)}s
- **Errors**: ${result.errors.length}
## Configuration
- **Source**: ${metadata.source}
- **Index Name**: ${metadata.indexName}
- **Namespace**: ${metadata.namespace}
- **Embedding Model**: ${metadata.model}
- **Batch Size**: ${metadata.batchSize}
## Errors
${result.errors.length > 0 ? result.errors.map((error) => `- ${error}`).join('\n') : 'No errors occurred.'}
## Next Steps
${result.success ?
'ā
Your codebase has been successfully vectorized and is ready for semantic search!' :
'ā Please resolve the errors above and try again.'}
---
Generated on: ${new Date().toISOString()}
`;
await fs.promises.writeFile(outputPath, report, 'utf8');
console.log(chalk_1.default.blue(`š Report saved to: ${outputPath}`));
}
;