UNPKG

remcode

Version:

Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.

196 lines (190 loc) • 9.83 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.vectorizeCommand = vectorizeCommand; const chalk_1 = __importDefault(require("chalk")); const ora_1 = __importDefault(require("ora")); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const source_1 = require("../utils/source"); const config_1 = require("../utils/config"); const pipeline_1 = require("../vectorizers/pipeline"); const logger_1 = require("../utils/logger"); const logger = (0, logger_1.getLogger)('VectorizeCommand'); function vectorizeCommand(program) { program .command('vectorize') .description('Vectorize a codebase using the new pipeline') .argument('<source>', 'Source codebase (GitHub URL or local path)') .option('-a, --analysis <path>', 'Path to analysis report', './codebase_analysis.json') .option('-o, --output <path>', 'Output path for vectorization report', './vectorization_report.md') .option('-c, --config <path>', 'Path to config file') .option('-p, --pinecone-key <key>', 'Pinecone API key') .option('-i, --index <name>', 'Pinecone index name') .option('-n, --namespace <name>', 'Pinecone namespace', 'default') .option('-e, --environment <env>', 'Pinecone environment', 'gcp-starter') .option('-m, --model <model>', 'Embedding model to use', 'microsoft/graphcodebert-base') .option('-f, --fallback <model>', 'Fallback embedding model', 'sentence-transformers/all-MiniLM-L6-v2') .option('-b, --batch-size <number>', 'Batch size for processing', '10') .option('-v, --verbose', 'Enable verbose output') .option('--timeout <seconds>', 'Maximum time for vectorization in seconds', '7200') .option('--cache <path>', 'Path to cache directory', './.remcode-cache') .action(async (source, options) => { const spinner = (0, ora_1.default)('Initializing vectorization...').start(); try { // Load configuration const config = (0, config_1.loadConfig)(options.config); // Resolve source path const resolvedSource = await (0, source_1.resolveSource)(source); const sourcePath = resolvedSource.path || source; if (!fs.existsSync(sourcePath)) { throw new Error(`Source path does not exist: ${sourcePath}`); } // Get API keys const pineconeApiKey = options.pineconeKey || process.env.PINECONE_API_KEY || config.vectorization?.storage?.pinecone?.apiKey; const huggingfaceToken = process.env.HUGGINGFACE_TOKEN || undefined; if (!pineconeApiKey) { throw new Error('Pinecone API key is required. Use --pinecone-key option or set PINECONE_API_KEY environment variable.'); } if (!huggingfaceToken) { throw new Error('HuggingFace token is required. Set HUGGINGFACE_TOKEN environment variable.'); } // Determine index name const indexName = options.index || config.vectorization?.storage?.indexes?.moduleName || `remcode-${path.basename(sourcePath).replace(/[^a-zA-Z0-9]/g, '-').toLowerCase()}`; spinner.text = 'Setting up vectorization pipeline...'; // Initialize vectorization pipeline const pipeline = new pipeline_1.VectorizationPipeline({ pineconeApiKey, pineconeIndexName: indexName, pineconeNamespace: options.namespace, pineconeEnvironment: options.environment, huggingfaceToken, embeddingModel: options.model, fallbackModel: options.fallback, batchSize: parseInt(options.batchSize), maxFileSize: 1024 * 1024, // 1MB includeExtensions: ['.ts', '.js', '.jsx', '.tsx', '.py', '.java', '.go', '.rb', '.php', '.cpp', '.c', '.cs', '.rs'], excludeExtensions: ['.min.js', '.bundle.js', '.test.js', '.spec.js', '.d.ts'], excludePaths: ['node_modules', '.git', 'dist', 'build', '__pycache__', '.pytest_cache', 'coverage'] }); await pipeline.initialize(); spinner.text = 'Processing codebase...'; // Process the directory const result = await pipeline.processDirectory(sourcePath); spinner.stop(); // Display results if (result.success) { console.log(chalk_1.default.green('āœ… Vectorization completed successfully!')); console.log(chalk_1.default.blue(`šŸ“ Files processed: ${result.filesProcessed}`)); console.log(chalk_1.default.blue(`🧩 Chunks created: ${result.chunksCreated}`)); console.log(chalk_1.default.blue(`šŸ”¢ Vectors stored: ${result.vectorsStored}`)); console.log(chalk_1.default.blue(`ā±ļø Duration: ${Math.round(result.duration / 1000)}s`)); if (result.errors.length > 0) { console.log(chalk_1.default.yellow(`āš ļø Warnings: ${result.errors.length} files had issues`)); if (options.verbose) { result.errors.forEach(error => console.log(chalk_1.default.yellow(` ${error}`))); } } } else { console.log(chalk_1.default.red('āŒ Vectorization failed')); console.log(chalk_1.default.red(`šŸ“ Files processed: ${result.filesProcessed}`)); console.log(chalk_1.default.red(`āŒ Errors: ${result.errors.length}`)); result.errors.forEach(error => console.log(chalk_1.default.red(` ${error}`))); } // Generate report await generateVectorizationReport(result, options.output, { source: sourcePath, indexName, namespace: options.namespace, model: options.model, batchSize: options.batchSize }); // Test search functionality if (result.success && result.vectorsStored > 0) { console.log(chalk_1.default.blue('\nšŸ” Testing search functionality...')); try { const searchResults = await pipeline.searchSimilarCode('function authentication', 3); console.log(chalk_1.default.green(`āœ… Search test successful: found ${searchResults.length} results`)); } catch (error) { console.log(chalk_1.default.yellow(`āš ļø Search test failed: ${error instanceof Error ? error.message : String(error)}`)); } } } catch (error) { spinner.stop(); const errorMessage = error instanceof Error ? error.message : String(error); console.error(chalk_1.default.red(`āŒ Vectorization failed: ${errorMessage}`)); logger.error(`Vectorization error: ${errorMessage}`); process.exit(1); } }); } async function generateVectorizationReport(result, outputPath, metadata) { const report = `# Vectorization Report ## Summary - **Status**: ${result.success ? 'āœ… Success' : 'āŒ Failed'} - **Files Processed**: ${result.filesProcessed} - **Chunks Created**: ${result.chunksCreated} - **Vectors Stored**: ${result.vectorsStored} - **Duration**: ${Math.round(result.duration / 1000)}s - **Errors**: ${result.errors.length} ## Configuration - **Source**: ${metadata.source} - **Index Name**: ${metadata.indexName} - **Namespace**: ${metadata.namespace} - **Embedding Model**: ${metadata.model} - **Batch Size**: ${metadata.batchSize} ## Errors ${result.errors.length > 0 ? result.errors.map((error) => `- ${error}`).join('\n') : 'No errors occurred.'} ## Next Steps ${result.success ? 'āœ… Your codebase has been successfully vectorized and is ready for semantic search!' : 'āŒ Please resolve the errors above and try again.'} --- Generated on: ${new Date().toISOString()} `; await fs.promises.writeFile(outputPath, report, 'utf8'); console.log(chalk_1.default.blue(`šŸ“„ Report saved to: ${outputPath}`)); }