UNPKG

ai-index

Version:

AI-powered local code indexing and search system for any codebase

543 lines (449 loc) • 15.8 kB
#!/usr/bin/env node import { globby } from 'globby'; import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { loadConfig } from './config.js'; import { createLocalEmbedder } from './local-embedder.js'; import { createLocalVectorStore } from './local-vector-store.js'; import { CodeAnalyzer } from './code-analyzer.js'; import { FileMonitor } from './file-monitor.js'; import { execSync } from 'child_process'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); export class SmartIndexer { constructor(options = {}) { this.rootPath = options.rootPath || process.cwd(); this.indexName = options.indexName || path.basename(this.rootPath).replace(/[^a-zA-Z0-9_-]/g, '_'); this.entryPoints = options.entryPoints || []; this.watchMode = options.watch || false; this.analyzer = new CodeAnalyzer(); this.monitor = null; this.embedder = null; this.vectorStore = null; this.config = null; this.symbolIndex = new Map(); this.fileIndex = new Map(); this.importGraph = new Map(); } async initialize() { console.log('🧠 Initializing Smart Indexer...'); // Load configuration this.config = await loadConfig(); // Initialize embedder and vector store this.embedder = await createLocalEmbedder(this.config); this.vectorStore = await createLocalVectorStore(this.config, this.indexName); console.log(`šŸ“Š Index: ${this.indexName}`); console.log(`šŸ“ Root: ${this.rootPath}`); console.log(`šŸ”¢ Embedding dimensions: ${this.embedder.getDimensions()}`); if (this.watchMode) { this.setupFileMonitor(); } } setupFileMonitor() { this.monitor = new FileMonitor({ rootPath: this.rootPath, debounceDelay: 2000 }); this.monitor.on('files-to-index', async (files) => { console.log(`\nšŸ”„ Reindexing ${files.length} changed files...`); for (const file of files) { await this.indexFile(file.fullPath, file.content, file.path); } await this.updateManifest(); console.log('āœ… Reindexing complete'); }); this.monitor.on('files-deleted', async (filePaths) => { console.log(`\nšŸ—‘ļø Removing ${filePaths.length} deleted files from index...`); for (const filePath of filePaths) { await this.removeFileFromIndex(filePath); } console.log('āœ… Cleanup complete'); }); } async start() { await this.initialize(); if (this.entryPoints.length > 0) { await this.indexFromEntryPoints(); } else { await this.indexAllFiles(); } if (this.watchMode) { console.log('\nšŸ‘ļø Starting continuous monitoring...'); await this.monitor.start(); // Keep process running process.on('SIGINT', async () => { console.log('\nā¹ļø Stopping indexer...'); await this.monitor.stop(); process.exit(0); }); } } async indexFromEntryPoints() { console.log('\nšŸŽÆ Indexing from entry points...'); const visited = new Set(); const toVisit = [...this.entryPoints]; while (toVisit.length > 0) { const file = toVisit.shift(); if (visited.has(file)) continue; visited.add(file); const fullPath = path.isAbsolute(file) ? file : path.join(this.rootPath, file); try { const content = await fs.readFile(fullPath, 'utf-8'); const analysis = await this.analyzer.analyzeFile(fullPath, content); if (analysis) { await this.indexAnalysis(analysis, content); // Add imported files to visit queue (only JavaScript files) for (const imp of analysis.imports) { if (imp.source.startsWith('.')) { const resolvedPath = await this.resolveImport(fullPath, imp.source); if (resolvedPath && !visited.has(resolvedPath) && this.isJavaScriptFile(resolvedPath)) { toVisit.push(resolvedPath); } } } } } catch (error) { console.error(`Error indexing ${file}:`, error.message); } } console.log(`āœ… Indexed ${visited.size} files from entry points`); } async indexAllFiles() { console.log('\nšŸ“‚ Indexing JavaScript files (respecting .gitignore)...'); // Only JavaScript files for now as requested const patterns = ['**/*.{js,mjs}']; const ignorePatterns = [ '**/node_modules/**', '**/dist/**', '**/build/**', '**/*.min.js', '**/*.test.js', '**/*.spec.js' ]; // Add git-ignored files to ignore patterns const gitIgnored = await this.getGitIgnoredFiles(); ignorePatterns.push(...gitIgnored); const files = await globby(patterns, { cwd: this.rootPath, ignore: ignorePatterns, absolute: false, gitignore: true }); console.log(`Found ${files.length} JavaScript files to index`); let processed = 0; for (const file of files) { const fullPath = path.join(this.rootPath, file); try { const content = await fs.readFile(fullPath, 'utf-8'); await this.indexFile(fullPath, content, file); processed++; if (processed % 50 === 0) { console.log(`Processed ${processed}/${files.length} files...`); } } catch (error) { console.error(`Error indexing ${file}:`, error.message); } } await this.updateManifest(); console.log(`āœ… Indexed ${processed} files successfully`); } async indexFile(fullPath, content, relativePath) { const analysis = await this.analyzer.analyzeFile(fullPath, content); if (analysis) { // Remove old chunks if file was previously indexed if (this.fileIndex.has(relativePath)) { await this.vectorStore.removeDocumentsByFile(relativePath); } await this.indexAnalysis(analysis, content, relativePath); // Update file index this.fileIndex.set(relativePath, { symbols: analysis.symbols.map(s => s.name), imports: analysis.imports.map(i => i.source), exports: analysis.exports.map(e => e.name).filter(n => n), complexity: analysis.complexity, lastIndexed: new Date().toISOString() }); } } async indexAnalysis(analysis, content, relativePath = null) { const filePath = relativePath || analysis.filePath; const documents = []; // Create enriched chunks with semantic understanding for (const chunk of analysis.chunks) { const embedding = await this.createEnrichedEmbedding(chunk, analysis); documents.push({ id: `${filePath}:${chunk.startLine}:${chunk.type}`, repo_path: filePath, content: chunk.content, embedding, // Enhanced metadata chunk_type: chunk.type, symbol_name: chunk.symbolName, symbol_type: chunk.symbolType, start_line: chunk.startLine, end_line: chunk.endLine, // Code structure metadata complexity: chunk.metadata.complexity || analysis.complexity, async: chunk.metadata.async || false, params: chunk.metadata.params || [], methods: chunk.metadata.methods || [], extends: chunk.metadata.extends, // Relationship metadata imports: chunk.type === 'imports' ? chunk.metadata.sources : [], exports: chunk.type === 'exports' ? chunk.metadata.exportedSymbols : [], // File context file_symbols: analysis.symbols.map(s => s.name), file_imports: analysis.imports.map(i => i.source), file_exports: analysis.exports.map(e => e.name).filter(n => n), // Usage tracking for better search context usages: analysis.usages || [], references: analysis.references || [], language: analysis.language, area: this.inferArea(filePath) }); } // Index documents in vector store if (documents.length > 0) { await this.vectorStore.addDocuments(documents); } // Update symbol index analysis.symbols.forEach(symbol => { const symbolId = `${filePath}:${symbol.name}`; this.symbolIndex.set(symbolId, { ...symbol, filePath, references: [], callers: [], callees: [] }); }); // Update import graph this.importGraph.set(filePath, { imports: analysis.imports, exports: analysis.exports, symbols: analysis.symbols }); } async createEnrichedEmbedding(chunk, analysis) { // Create context-aware text for embedding let enrichedText = chunk.content; // Add symbol context if (chunk.symbolName) { enrichedText = `${chunk.symbolType} ${chunk.symbolName}\n${enrichedText}`; } // Add file context const fileContext = `File: ${analysis.filePath} Language: ${analysis.language}`; enrichedText = `${fileContext}\n${enrichedText}`; // Add relationship context if (chunk.type === 'symbol' && chunk.symbolName) { const usedBy = this.findSymbolReferences(chunk.symbolName, analysis.filePath); if (usedBy.length > 0) { enrichedText += `\nUsed by: ${usedBy.join(', ')}`; } } return await this.embedder.embed(enrichedText); } findSymbolReferences(symbolName, filePath) { const references = []; // Look through import graph for references this.importGraph.forEach((fileData, file) => { if (file !== filePath) { fileData.imports.forEach(imp => { if (imp.specifiers) { imp.specifiers.forEach(spec => { if (spec.imported === symbolName || spec.local === symbolName) { references.push(file); } }); } }); } }); return references; } async removeFileFromIndex(relativePath) { // Remove from vector store await this.vectorStore.removeDocumentsByFile(relativePath); // Remove from indexes this.fileIndex.delete(relativePath); this.importGraph.delete(relativePath); // Remove symbols const symbolsToRemove = []; this.symbolIndex.forEach((symbol, id) => { if (symbol.filePath === relativePath) { symbolsToRemove.push(id); } }); symbolsToRemove.forEach(id => this.symbolIndex.delete(id)); } inferArea(filePath) { const p = filePath.toLowerCase(); if (p.includes('/api/') || p.includes('/server/') || p.includes('/backend/')) { return 'backend'; } if (p.includes('/components/') || p.includes('/pages/') || p.includes('/frontend/')) { return 'frontend'; } if (p.includes('/utils/') || p.includes('/lib/') || p.includes('/helpers/')) { return 'utils'; } if (p.includes('/types/') || p.endsWith('.d.ts')) { return 'types'; } if (p.includes('/test/') || p.includes('/__tests__/')) { return 'tests'; } return 'other'; } async updateManifest() { const stats = await this.vectorStore.getStats(); const codeGraph = this.analyzer.getCodeGraph(); const manifest = { mode: 'smart', index: this.indexName, folder: this.rootPath, embed_model: this.config.EMBED_MODEL, last_built: new Date().toISOString(), stats: { total_files: this.fileIndex.size, total_symbols: this.symbolIndex.size, total_chunks: stats.documentCount, total_imports: codeGraph.imports.length, complexity_average: this.calculateAverageComplexity() }, entry_points: this.entryPoints, watch_mode: this.watchMode }; const manifestPath = path.join(this.rootPath, 'ai_index/manifest.json'); await fs.mkdir(path.dirname(manifestPath), { recursive: true }); await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2)); } async getGitIgnoredFiles() { try { // Check if this is a git repository execSync('git rev-parse --git-dir', { cwd: this.rootPath, stdio: 'pipe' }); // Get list of ignored files const output = execSync('git ls-files --others --ignored --exclude-standard', { cwd: this.rootPath, encoding: 'utf-8', stdio: 'pipe' }); return output .split('\n') .filter(line => line.trim()) .map(file => file.trim()); } catch (error) { // Not a git repository or git not available return []; } } async resolveImport(fromFile, importPath) { const dir = path.dirname(fromFile); const fullImportPath = path.resolve(dir, importPath); // Try different extensions in order of preference (only JS now) const extensions = ['.js', '.mjs']; // If import already has extension, try it first if (path.extname(importPath)) { try { await fs.access(fullImportPath); return fullImportPath; } catch { // File doesn't exist, continue with extension attempts } } // Try adding extensions for (const ext of extensions) { const pathWithExt = fullImportPath + ext; try { await fs.access(pathWithExt); return pathWithExt; } catch { // File doesn't exist, try next extension } } // Try index files const indexExtensions = ['/index.js', '/index.mjs']; for (const indexExt of indexExtensions) { const indexPath = fullImportPath + indexExt; try { await fs.access(indexPath); return indexPath; } catch { // Index file doesn't exist, try next } } return null; // Couldn't resolve import } isJavaScriptFile(filePath) { const ext = path.extname(filePath).toLowerCase(); return ['.js', '.mjs'].includes(ext); } calculateAverageComplexity() { let total = 0; let count = 0; this.fileIndex.forEach(file => { if (file.complexity) { total += file.complexity; count++; } }); return count > 0 ? (total / count).toFixed(2) : 0; } getIndexStats() { return { files: this.fileIndex.size, symbols: this.symbolIndex.size, imports: this.importGraph.size, codeGraph: this.analyzer.getCodeGraph() }; } } // CLI support if (import.meta.url === `file://${process.argv[1]}`) { const args = process.argv.slice(2); const options = { rootPath: process.cwd(), watch: false, entryPoints: [] }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case '--watch': case '-w': options.watch = true; break; case '--entry': case '-e': if (args[i + 1]) { options.entryPoints.push(args[++i]); } break; case '--help': console.log(` Smart Indexer - AI-powered code understanding Usage: smart-index [options] [folder] Options: --watch, -w Enable continuous file monitoring --entry, -e <file> Specify entry point(s) for targeted indexing --help Show this help message Examples: smart-index # Index current directory smart-index --watch # Index with file monitoring smart-index -e src/index.js # Index from entry point smart-index -e src/app.js -e src/api.js --watch `); process.exit(0); default: if (!args[i].startsWith('-')) { options.rootPath = path.resolve(args[i]); } } } const indexer = new SmartIndexer(options); indexer.start().catch(console.error); }