UNPKG

ai-index

Version:

AI-powered local code indexing and search system for any codebase

293 lines (239 loc) 8.08 kB
import { LocalIndex } from 'vectra'; import path from 'path'; import { homedir } from 'os'; import fs from 'fs/promises'; export class LocalVectorStore { constructor(config = {}, indexName = null) { this.indexName = indexName || config.INDEX_NAME || 'code_index'; this.dataPath = this.expandPath(config.DATA_PATH || '~/.ai-index/data'); this.index = null; this.dimensions = null; } expandPath(filepath) { if (filepath.startsWith('~/')) { return path.join(homedir(), filepath.slice(2)); } return filepath; } async initialize(dimensions = 384) { this.dimensions = dimensions; let indexPath = path.join(this.dataPath, this.indexName); try { await fs.mkdir(this.dataPath, { recursive: true }); this.index = new LocalIndex(indexPath); const exists = await this.index.isIndexCreated(); if (!exists) { console.log(`📦 Creating new local index at: ${indexPath}`); await this.index.createIndex({ dimensions, metric: 'cosine', cacheSize: 1000 }); } else { console.log(`📂 Loading existing index from: ${indexPath}`); } } catch (error) { // Fallback to project-local storage if homedir path is not writable const fallbackBase = path.join(process.cwd(), 'ai_index', 'vector_data'); try { await fs.mkdir(fallbackBase, { recursive: true }); indexPath = path.join(fallbackBase, this.indexName); this.index = new LocalIndex(indexPath); const exists = await this.index.isIndexCreated(); if (!exists) { console.log(`📦 Creating new local index at: ${indexPath}`); await this.index.createIndex({ dimensions, metric: 'cosine', cacheSize: 1000 }); } else { console.log(`📂 Loading existing index from: ${indexPath}`); } } catch (fallbackErr) { console.error('Failed to initialize vector store:', fallbackErr); throw fallbackErr; } } } async addDocuments(documents) { if (!this.index) { throw new Error('Vector store not initialized'); } const items = documents.map(doc => ({ id: doc.id || this.generateId(), vector: doc.embedding, metadata: { content: doc.content, repo_path: doc.repo_path || '', area: doc.area || '', language: doc.language || '', start_line: doc.start_line || 0, end_line: doc.end_line || 0, chunk_id: doc.chunk_id || 0, is_documentation: doc.is_documentation || false, ...doc.metadata } })); await this.index.beginUpdate(); for (const item of items) { await this.index.upsertItem(item); } await this.index.endUpdate(); return items.length; } async search(queryEmbedding, options = {}) { if (!this.index) { throw new Error('Vector store not initialized'); } const { k = 10, filter = {}, scoreThreshold = 0 } = options; const results = await this.index.queryItems(queryEmbedding, k); let filtered = results; if (filter.area) { filtered = filtered.filter(r => r.item.metadata.area === filter.area); } if (filter.language) { filtered = filtered.filter(r => r.item.metadata.language === filter.language); } if (scoreThreshold > 0) { filtered = filtered.filter(r => r.score >= scoreThreshold); } return filtered.map(result => ({ id: result.item.id, score: result.score, metadata: result.item.metadata, content: result.item.metadata.content })); } async hybridSearch(query, queryEmbedding, options = {}) { const { k = 10, textWeight = 0.4, vectorWeight = 0.6, filter = {} } = options; const vectorResults = await this.search(queryEmbedding, { k: k * 2, filter }); const textResults = []; const allItems = await this.index.listItems(); // Simple lexical search for (const item of allItems) { const content = (item.metadata.content || '').toLowerCase(); const repoPath = (item.metadata.repo_path || '').toLowerCase(); const query_lower = query.toLowerCase(); // Basic text scoring const contentScore = this.calculateTextScore(content, query_lower); // Path-based scoring const pathScore = this.calculatePathScore(repoPath, query_lower); const totalTextScore = contentScore + pathScore; if (totalTextScore > 0) { textResults.push({ id: item.id, score: totalTextScore, metadata: item.metadata, content: item.metadata.content }); } } textResults.sort((a, b) => b.score - a.score); // Combine vector and text results const combined = new Map(); for (const result of vectorResults) { combined.set(result.id, { ...result, finalScore: result.score * vectorWeight }); } for (const result of textResults.slice(0, k * 2)) { if (combined.has(result.id)) { combined.get(result.id).finalScore += result.score * textWeight; } else { combined.set(result.id, { ...result, finalScore: result.score * textWeight }); } } const results = Array.from(combined.values()) .sort((a, b) => b.finalScore - a.finalScore) .slice(0, k); return results; } calculatePathScore(path, query) { const queryTerms = query.split(/\s+/); let score = 0; for (const term of queryTerms) { if (term.length < 2) continue; if (path.includes(term)) { score += 1.0; } // Boost for exact filename matches if (path.endsWith(term + '.js') || path.endsWith(term + '.ts') || path.endsWith(term + '.jsx') || path.endsWith(term + '.tsx')) { score += 2.0; } } return score; } calculateTextScore(content, query) { if (!content || !query) return 0; const queryTerms = query.split(/\s+/).filter(term => term.length > 1); if (queryTerms.length === 0) return 0; let score = 0; // Exact phrase match gets highest score if (content.includes(query)) { score += queryTerms.length * 3; } // Individual term matches for (const term of queryTerms) { const matches = (content.match(new RegExp(term, 'gi')) || []).length; score += matches; } // Normalize by query length return score / queryTerms.length; } async removeDocumentsByFile(filePath) { if (!this.index) { throw new Error('Vector store not initialized'); } const allItems = await this.index.listItems(); const itemsToRemove = allItems.filter(item => item.metadata.repo_path === filePath ); if (itemsToRemove.length === 0) { return 0; } await this.index.beginUpdate(); for (const item of itemsToRemove) { await this.index.deleteItem(item.id); } await this.index.endUpdate(); return itemsToRemove.length; } async deleteIndex() { if (this.index) { const indexPath = path.join(this.dataPath, this.indexName); await this.index.deleteIndex(); console.log(`🗑️ Deleted index at: ${indexPath}`); } } async getStats() { if (!this.index) { return { documentCount: 0, dimensions: 0 }; } const items = await this.index.listItems(); return { documentCount: items.length, dimensions: this.dimensions, indexPath: path.join(this.dataPath, this.indexName) }; } generateId() { return `doc_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`; } } export async function createLocalVectorStore(config = {}, indexName = null) { const store = new LocalVectorStore(config, indexName); const dimensions = config.EMBED_DIM || 384; await store.initialize(dimensions); return store; }