UNPKG

context-rag

Version:

Get relevant project context for AI agents to save 90% of tokens. Lightweight CLI tool for semantic search on project codebases.

213 lines (171 loc) 5.92 kB
const path = require('path'); const chalk = require('chalk'); // For now, we'll use a JavaScript implementation until Rust module is compiled class ContextRagIndexer { constructor(config) { this.config = config; this.indexPath = config.storage.path; this.embeddingService = null; // Will be initialized when needed } async indexDirectory(targetPath = '.', options = {}) { const fs = require('fs'); const crypto = require('crypto'); const { performance } = require('perf_hooks'); const startTime = performance.now(); let indexedFiles = 0; let totalChunks = 0; console.log(chalk.blue('🔍 Scanning files...')); const files = await this.scanFiles(targetPath); console.log(chalk.gray(`Found ${files.length} files to process`)); const indexData = { files: {}, chunks: [], metadata: { created: new Date().toISOString(), version: '0.1.0' } }; for (const filePath of files) { try { const content = fs.readFileSync(filePath, 'utf8'); const fileHash = crypto.createHash('sha256').update(content).digest('hex'); const stats = fs.statSync(filePath); const chunks = this.chunkContent(content); indexData.files[filePath] = { hash: fileHash, modified: stats.mtime.getTime(), chunks: chunks.length }; chunks.forEach((chunk, index) => { indexData.chunks.push({ file_path: filePath, content: chunk, chunk_index: index, file_hash: fileHash, modified_time: stats.mtime.getTime() }); totalChunks++; }); indexedFiles++; } catch (error) { console.warn(chalk.yellow(`⚠️ Skipped ${filePath}: ${error.message}`)); } } // Save index to file const indexDir = path.dirname(this.indexPath); if (!fs.existsSync(indexDir)) { fs.mkdirSync(indexDir, { recursive: true }); } fs.writeFileSync(this.indexPath, JSON.stringify(indexData, null, 2)); // Generate embeddings for the chunks console.log(chalk.blue('🧠 Generating embeddings...')); await this.generateEmbeddings(indexData.chunks); const processingTime = performance.now() - startTime; return { indexed_files: indexedFiles, total_chunks: totalChunks, processing_time_ms: Math.round(processingTime) }; } async scanFiles(targetPath) { const fs = require('fs'); const path = require('path'); const files = []; const scanDir = (dir) => { const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { if (this.shouldIncludeDirectory(fullPath)) { scanDir(fullPath); } } else if (entry.isFile()) { if (this.shouldIncludeFile(fullPath)) { files.push(fullPath); } } } }; scanDir(targetPath); return files; } shouldIncludeFile(filePath) { const { include, exclude } = this.config.index; // Check exclusions first for (const excludePattern of exclude) { if (filePath.includes(excludePattern)) { return false; } } // Check inclusions for (const includePattern of include) { if (includePattern.startsWith('*.')) { // Extension pattern const ext = includePattern.slice(2); if (filePath.endsWith(`.${ext}`)) { return true; } } else if (includePattern.endsWith('/')) { // Directory pattern if (filePath.includes(includePattern)) { return true; } } else { // Filename pattern if (filePath.includes(includePattern)) { return true; } } } return false; } shouldIncludeDirectory(dirPath) { const { exclude } = this.config.index; for (const excludePattern of exclude) { if (dirPath.includes(excludePattern)) { return false; } } return true; } chunkContent(content) { const MAX_CHUNK_SIZE = 1000; const chunks = []; let currentChunk = ''; const lines = content.split('\n'); for (const line of lines) { if (currentChunk.length + line.length > MAX_CHUNK_SIZE && currentChunk.length > 0) { chunks.push(currentChunk.trim()); currentChunk = ''; } currentChunk += line + '\n'; } if (currentChunk.trim().length > 0) { chunks.push(currentChunk.trim()); } return chunks.length > 0 ? chunks : [content]; } async generateEmbeddings(chunks) { if (!this.embeddingService) { const { EmbeddingService } = require('./embedder'); this.embeddingService = new EmbeddingService(this.config); } try { const embeddedChunks = await this.embeddingService.generateEmbeddings(chunks); // Save embeddings separately for better performance const embeddingsPath = this.indexPath.replace('.db', '_embeddings.json'); const embeddingsData = { model: this.config.embedder.model, chunks: embeddedChunks }; const fs = require('fs'); fs.writeFileSync(embeddingsPath, JSON.stringify(embeddingsData, null, 2)); console.log(chalk.green(`✅ Generated embeddings for ${embeddedChunks.length} chunks`)); console.log(chalk.gray(`💾 Embeddings saved to: ${embeddingsPath}`)); } catch (error) { console.warn(chalk.yellow(`⚠️ Failed to generate embeddings: ${error.message}`)); console.log(chalk.gray('Search functionality will be limited without embeddings')); } } } module.exports = { ContextRagIndexer };