ai-index
Version:
AI-powered local code indexing and search system for any codebase
293 lines (239 loc) • 8.08 kB
JavaScript
import { LocalIndex } from 'vectra';
import path from 'path';
import { homedir } from 'os';
import fs from 'fs/promises';
export class LocalVectorStore {
constructor(config = {}, indexName = null) {
this.indexName = indexName || config.INDEX_NAME || 'code_index';
this.dataPath = this.expandPath(config.DATA_PATH || '~/.ai-index/data');
this.index = null;
this.dimensions = null;
}
expandPath(filepath) {
if (filepath.startsWith('~/')) {
return path.join(homedir(), filepath.slice(2));
}
return filepath;
}
async initialize(dimensions = 384) {
this.dimensions = dimensions;
let indexPath = path.join(this.dataPath, this.indexName);
try {
await fs.mkdir(this.dataPath, { recursive: true });
this.index = new LocalIndex(indexPath);
const exists = await this.index.isIndexCreated();
if (!exists) {
console.log(`📦 Creating new local index at: ${indexPath}`);
await this.index.createIndex({
dimensions,
metric: 'cosine',
cacheSize: 1000
});
} else {
console.log(`📂 Loading existing index from: ${indexPath}`);
}
} catch (error) {
// Fallback to project-local storage if homedir path is not writable
const fallbackBase = path.join(process.cwd(), 'ai_index', 'vector_data');
try {
await fs.mkdir(fallbackBase, { recursive: true });
indexPath = path.join(fallbackBase, this.indexName);
this.index = new LocalIndex(indexPath);
const exists = await this.index.isIndexCreated();
if (!exists) {
console.log(`📦 Creating new local index at: ${indexPath}`);
await this.index.createIndex({ dimensions, metric: 'cosine', cacheSize: 1000 });
} else {
console.log(`📂 Loading existing index from: ${indexPath}`);
}
} catch (fallbackErr) {
console.error('Failed to initialize vector store:', fallbackErr);
throw fallbackErr;
}
}
}
async addDocuments(documents) {
if (!this.index) {
throw new Error('Vector store not initialized');
}
const items = documents.map(doc => ({
id: doc.id || this.generateId(),
vector: doc.embedding,
metadata: {
content: doc.content,
repo_path: doc.repo_path || '',
area: doc.area || '',
language: doc.language || '',
start_line: doc.start_line || 0,
end_line: doc.end_line || 0,
chunk_id: doc.chunk_id || 0,
is_documentation: doc.is_documentation || false,
...doc.metadata
}
}));
await this.index.beginUpdate();
for (const item of items) {
await this.index.upsertItem(item);
}
await this.index.endUpdate();
return items.length;
}
async search(queryEmbedding, options = {}) {
if (!this.index) {
throw new Error('Vector store not initialized');
}
const {
k = 10,
filter = {},
scoreThreshold = 0
} = options;
const results = await this.index.queryItems(queryEmbedding, k);
let filtered = results;
if (filter.area) {
filtered = filtered.filter(r => r.item.metadata.area === filter.area);
}
if (filter.language) {
filtered = filtered.filter(r => r.item.metadata.language === filter.language);
}
if (scoreThreshold > 0) {
filtered = filtered.filter(r => r.score >= scoreThreshold);
}
return filtered.map(result => ({
id: result.item.id,
score: result.score,
metadata: result.item.metadata,
content: result.item.metadata.content
}));
}
async hybridSearch(query, queryEmbedding, options = {}) {
const {
k = 10,
textWeight = 0.4,
vectorWeight = 0.6,
filter = {}
} = options;
const vectorResults = await this.search(queryEmbedding, { k: k * 2, filter });
const textResults = [];
const allItems = await this.index.listItems();
// Simple lexical search
for (const item of allItems) {
const content = (item.metadata.content || '').toLowerCase();
const repoPath = (item.metadata.repo_path || '').toLowerCase();
const query_lower = query.toLowerCase();
// Basic text scoring
const contentScore = this.calculateTextScore(content, query_lower);
// Path-based scoring
const pathScore = this.calculatePathScore(repoPath, query_lower);
const totalTextScore = contentScore + pathScore;
if (totalTextScore > 0) {
textResults.push({
id: item.id,
score: totalTextScore,
metadata: item.metadata,
content: item.metadata.content
});
}
}
textResults.sort((a, b) => b.score - a.score);
// Combine vector and text results
const combined = new Map();
for (const result of vectorResults) {
combined.set(result.id, {
...result,
finalScore: result.score * vectorWeight
});
}
for (const result of textResults.slice(0, k * 2)) {
if (combined.has(result.id)) {
combined.get(result.id).finalScore += result.score * textWeight;
} else {
combined.set(result.id, {
...result,
finalScore: result.score * textWeight
});
}
}
const results = Array.from(combined.values())
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, k);
return results;
}
calculatePathScore(path, query) {
const queryTerms = query.split(/\s+/);
let score = 0;
for (const term of queryTerms) {
if (term.length < 2) continue;
if (path.includes(term)) {
score += 1.0;
}
// Boost for exact filename matches
if (path.endsWith(term + '.js') || path.endsWith(term + '.ts') ||
path.endsWith(term + '.jsx') || path.endsWith(term + '.tsx')) {
score += 2.0;
}
}
return score;
}
calculateTextScore(content, query) {
if (!content || !query) return 0;
const queryTerms = query.split(/\s+/).filter(term => term.length > 1);
if (queryTerms.length === 0) return 0;
let score = 0;
// Exact phrase match gets highest score
if (content.includes(query)) {
score += queryTerms.length * 3;
}
// Individual term matches
for (const term of queryTerms) {
const matches = (content.match(new RegExp(term, 'gi')) || []).length;
score += matches;
}
// Normalize by query length
return score / queryTerms.length;
}
async removeDocumentsByFile(filePath) {
if (!this.index) {
throw new Error('Vector store not initialized');
}
const allItems = await this.index.listItems();
const itemsToRemove = allItems.filter(item =>
item.metadata.repo_path === filePath
);
if (itemsToRemove.length === 0) {
return 0;
}
await this.index.beginUpdate();
for (const item of itemsToRemove) {
await this.index.deleteItem(item.id);
}
await this.index.endUpdate();
return itemsToRemove.length;
}
async deleteIndex() {
if (this.index) {
const indexPath = path.join(this.dataPath, this.indexName);
await this.index.deleteIndex();
console.log(`🗑️ Deleted index at: ${indexPath}`);
}
}
async getStats() {
if (!this.index) {
return { documentCount: 0, dimensions: 0 };
}
const items = await this.index.listItems();
return {
documentCount: items.length,
dimensions: this.dimensions,
indexPath: path.join(this.dataPath, this.indexName)
};
}
generateId() {
return `doc_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;
}
}
export async function createLocalVectorStore(config = {}, indexName = null) {
const store = new LocalVectorStore(config, indexName);
const dimensions = config.EMBED_DIM || 384;
await store.initialize(dimensions);
return store;
}