UNPKG

remcode

Version:

Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.

313 lines (312 loc) 13.8 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.IncrementalProcessor = void 0; const logger_1 = require("../utils/logger"); const state_manager_1 = require("./state-manager"); const pinecone_1 = require("../vectorizers/storage/pinecone"); const manager_1 = require("../vectorizers/chunkers/manager"); const manager_2 = require("../vectorizers/embedders/manager"); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const logger = (0, logger_1.getLogger)('IncrementalProcessor'); class IncrementalProcessor { constructor(options) { this.storage = null; this.options = { ...options, batchSize: options.batchSize || 50, dryRun: options.dryRun || false, includeTests: options.includeTests || false, pineconeEnvironment: options.pineconeEnvironment || 'gcp-starter', pineconeNamespace: options.pineconeNamespace || 'default', embeddingModel: options.embeddingModel || 'text-embedding-ada-002' }; this.repoPath = options.repoPath; this.stateManager = new state_manager_1.StateManager(options.repoPath); // Initialize chunking manager with default strategy this.chunker = new manager_1.ChunkingManager({ clean_modules: 'function_level', complex_modules: 'class_level', monolithic_files: 'sliding_window_with_overlap' }); // Initialize embedding manager this.embeddingManager = new manager_2.EmbeddingManager({ primary: options.embeddingModel || 'microsoft/graphcodebert-base', fallback: 'sentence-transformers/all-MiniLM-L6-v2', batchSize: options.batchSize || 10, token: process.env.HUGGINGFACE_TOKEN }); // Initialize stats this.stats = { totalFiles: 0, addedFiles: 0, modifiedFiles: 0, deletedFiles: 0, totalChunks: 0, totalEmbeddings: 0, errorCount: 0, startTime: new Date() }; logger.info(`Initialized IncrementalProcessor with ${this.options.dryRun ? 'DRY RUN MODE' : 'LIVE MODE'}`); } /** * Initialize vector storage */ async initialize() { // Skip initialization in dry run mode if (this.options.dryRun) { logger.info('Dry run mode: Skipping vector storage initialization'); return; } logger.info('Initializing vector storage...'); try { // Initialize Pinecone storage this.storage = new pinecone_1.PineconeStorage({ apiKey: this.options.pineconeApiKey, indexName: this.options.pineconeIndexName, namespace: this.options.pineconeNamespace }); await this.storage.initialize(); logger.info('Vector storage initialized successfully'); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Failed to initialize vector storage: ${errorMsg}`); throw new Error(`Vector storage initialization failed: ${errorMsg}`); } } /** * Process a batch of changed files incrementally */ async processChangedFiles(changes, analyses) { const fileCount = changes.length; logger.info(`Processing ${fileCount} changed files incrementally`); this.stats.totalFiles = fileCount; // Map analyses by path for easy lookup const analysisMap = new Map(); for (const analysis of analyses) { analysisMap.set(analysis.path, analysis); } // Process deleted files first const deletedFiles = changes.filter(change => change.status === 'deleted'); this.stats.deletedFiles = deletedFiles.length; if (deletedFiles.length > 0) { logger.info(`Processing ${deletedFiles.length} deleted files`); for (const file of deletedFiles) { await this.deleteVectorsForFile(file.path); } } // Process added and modified files const activeFiles = changes.filter(change => change.status !== 'deleted'); const addedFiles = changes.filter(change => change.status === 'added'); const modifiedFiles = changes.filter(change => change.status === 'modified' || change.status === 'renamed'); this.stats.addedFiles = addedFiles.length; this.stats.modifiedFiles = modifiedFiles.length; // Process in batches const batchSize = this.options.batchSize || 20; for (let i = 0; i < activeFiles.length; i += batchSize) { const batch = activeFiles.slice(i, i + batchSize); logger.info(`Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(activeFiles.length / batchSize)}`); // Process each file in the batch const processPromises = batch.map(async (file) => { const analysis = analysisMap.get(file.path); if (!analysis) { logger.warn(`No analysis available for file: ${file.path}`); return; } // Skip test files if not including tests if (analysis.category === 'test' && !this.options.includeTests) { logger.debug(`Skipping test file: ${file.path}`); return; } // Skip files marked for ignore if (analysis.category === 'ignore') { logger.debug(`Skipping ignored file: ${file.path}`); return; } try { await this.processFile(file, analysis); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Error processing file ${file.path}: ${errorMsg}`); this.stats.errorCount++; } }); await Promise.all(processPromises); } // Update stats this.stats.endTime = new Date(); this.stats.durationMs = this.stats.endTime.getTime() - this.stats.startTime.getTime(); logger.info(`Completed incremental processing in ${this.stats.durationMs}ms`); logger.info(`Stats: ${JSON.stringify(this.stats, null, 2)}`); return this.stats; } /** * Process a single file */ async processFile(file, analysis) { logger.info(`Processing file: ${file.path}`); try { // 1. Read file content const filePath = path.join(this.repoPath, file.path); const content = fs.readFileSync(filePath, 'utf8'); // 2. Chunk the content based on the chunking strategy const chunks = await this.chunker.chunkFile(content, analysis.chunkingStrategy, { file_path: file.path, relative_path: file.path, language: analysis.language, size: analysis.size, extension: path.extname(file.path) }); this.stats.totalChunks += chunks.length; logger.debug(`Created ${chunks.length} chunks for ${file.path}`); // 3. Add metadata to chunks const enrichedChunks = chunks.map((chunk) => ({ ...chunk, metadata: { ...chunk.metadata, file_path: file.path, language: analysis.language, category: analysis.category, repo: path.basename(this.repoPath), changeType: file.status } })); // In dry run mode, just log the chunks if (this.options.dryRun) { logger.info(`[DRY RUN] Would process ${enrichedChunks.length} chunks for ${file.path}`); return; } // If modified, first delete existing vectors if (file.status === 'modified' || file.status === 'renamed') { await this.deleteVectorsForFile(file.path); // If renamed, also delete vectors for the previous path if (file.status === 'renamed' && file.previousPath) { await this.deleteVectorsForFile(file.previousPath); } } // 4. Generate embeddings const embeddings = await this.embeddingManager.embedChunks(enrichedChunks); this.stats.totalEmbeddings += embeddings.length; // 5. Store vectors in database if (this.storage && embeddings.length > 0) { // Convert CodeChunk[] to VectorData[] for storage const vectorData = embeddings .filter(chunk => chunk.embedding && chunk.embedding.length > 0) .map(chunk => ({ id: chunk.id, embedding: chunk.embedding, metadata: chunk.metadata })); await this.storage.storeVectors(vectorData); logger.info(`Stored ${vectorData.length} vectors for ${file.path}`); } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Failed to process file ${file.path}: ${errorMsg}`); this.stats.errorCount++; throw error; } } /** * Delete vectors for a file */ async deleteVectorsForFile(filePath) { logger.info(`Deleting vectors for file: ${filePath}`); if (this.options.dryRun) { logger.info(`[DRY RUN] Would delete vectors for file: ${filePath}`); return; } try { if (!this.storage) { throw new Error('Vector storage not initialized'); } // Delete vectors by metadata filter const deleteCount = await this.storage.deleteVectorsByMetadata({ file_path: filePath }, this.options.pineconeNamespace); logger.info(`Deleted vectors for file: ${filePath}`); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Failed to delete vectors for file ${filePath}: ${errorMsg}`); this.stats.errorCount++; } } /** * Update processing state with the latest commit */ async updateProcessingState(lastCommit) { logger.info(`Updating processing state to commit: ${lastCommit}`); if (this.options.dryRun) { logger.info(`[DRY RUN] Would update processing state to: ${lastCommit}`); return; } try { // Load current state const state = await this.stateManager.loadState() || {}; // Update state with new commit and stats // Define default state with explicit type casting const stateObj = state; const updatedState = { ...stateObj, processing: { // Ensure we have a default status if processing doesn't exist status: (stateObj.processing && 'status' in stateObj.processing) ? stateObj.processing.status : 'updated', // Include any existing processing properties ...(stateObj.processing || {}), // Add the new properties lastCommit, lastUpdated: new Date().toISOString(), stats: this.stats } }; // Save updated state await this.stateManager.updateState(updatedState); logger.info(`Successfully updated processing state to commit: ${lastCommit}`); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Failed to update processing state: ${errorMsg}`); throw new Error(`Failed to update processing state: ${errorMsg}`); } } } exports.IncrementalProcessor = IncrementalProcessor;