UNPKG

@boundless-oss/atlas

Version:

Atlas - MCP Server for comprehensive startup project management

191 lines 7.25 kB
import { promises as fs } from 'fs'; import path from 'path'; export class InMemoryVectorStore { embeddingModel; chunks = new Map(); embeddings = new Map(); documentChunks = new Map(); constructor(embeddingModel) { this.embeddingModel = embeddingModel; } async addChunk(chunk) { // Generate embedding if not provided let embedding = chunk.embedding; if (!embedding) { embedding = await this.embeddingModel.embedSingle(chunk.content); } // Store chunk and embedding this.chunks.set(chunk.id, chunk); this.embeddings.set(chunk.id, embedding); // Track document-chunk relationship if (!this.documentChunks.has(chunk.documentId)) { this.documentChunks.set(chunk.documentId, new Set()); } this.documentChunks.get(chunk.documentId).add(chunk.id); } async addChunks(chunks) { if (chunks.length === 0) return; // Separate chunks with and without embeddings const chunksNeedingEmbedding = []; const chunksWithEmbedding = []; for (const chunk of chunks) { if (chunk.embedding) { chunksWithEmbedding.push(chunk); } else { chunksNeedingEmbedding.push(chunk); } } // Batch generate embeddings for chunks that need them let generatedEmbeddings = []; if (chunksNeedingEmbedding.length > 0) { const texts = chunksNeedingEmbedding.map(c => c.content); generatedEmbeddings = await this.embeddingModel.embed(texts); } // Store all chunks for (const chunk of chunksWithEmbedding) { this.chunks.set(chunk.id, chunk); this.embeddings.set(chunk.id, chunk.embedding); if (!this.documentChunks.has(chunk.documentId)) { this.documentChunks.set(chunk.documentId, new Set()); } this.documentChunks.get(chunk.documentId).add(chunk.id); } for (let i = 0; i < chunksNeedingEmbedding.length; i++) { const chunk = chunksNeedingEmbedding[i]; const embedding = generatedEmbeddings[i]; this.chunks.set(chunk.id, chunk); this.embeddings.set(chunk.id, embedding); if (!this.documentChunks.has(chunk.documentId)) { this.documentChunks.set(chunk.documentId, new Set()); } this.documentChunks.get(chunk.documentId).add(chunk.id); } } async search(query, k) { if (this.chunks.size === 0) return []; // Generate query embedding const queryEmbedding = await this.embeddingModel.embedSingle(query); // Calculate similarities for all chunks const results = []; for (const [chunkId, chunk] of this.chunks) { const chunkEmbedding = this.embeddings.get(chunkId); if (!chunkEmbedding) continue; const score = this.embeddingModel.cosineSimilarity(queryEmbedding, chunkEmbedding); results.push({ chunk, score, document: { id: chunk.documentId, path: chunk.documentId // Using documentId as path for now } }); } // Sort by score and return top k results.sort((a, b) => b.score - a.score); return results.slice(0, k); } async searchWithFilters(query, k, filters) { if (this.chunks.size === 0) return []; // Generate query embedding const queryEmbedding = await this.embeddingModel.embedSingle(query); // Calculate similarities for filtered chunks const results = []; for (const [chunkId, chunk] of this.chunks) { // Apply filters let matchesFilters = true; for (const [key, value] of Object.entries(filters)) { if (chunk.metadata[key] !== value) { matchesFilters = false; break; } } if (!matchesFilters) continue; const chunkEmbedding = this.embeddings.get(chunkId); if (!chunkEmbedding) continue; const score = this.embeddingModel.cosineSimilarity(queryEmbedding, chunkEmbedding); results.push({ chunk, score, document: { id: chunk.documentId, path: chunk.documentId // Using documentId as path for now } }); } // Sort by score and return top k results.sort((a, b) => b.score - a.score); return results.slice(0, k); } async removeDocument(documentId) { const chunkIds = this.documentChunks.get(documentId); if (!chunkIds) return; // Remove all chunks for this document for (const chunkId of chunkIds) { this.chunks.delete(chunkId); this.embeddings.delete(chunkId); } // Remove document tracking this.documentChunks.delete(documentId); } async clear() { this.chunks.clear(); this.embeddings.clear(); this.documentChunks.clear(); } size() { return this.chunks.size; } getStats() { const totalChunks = this.chunks.size; const totalDocuments = this.documentChunks.size; const embeddingDimension = this.embeddingModel.dimension; // Calculate memory usage (rough estimate) const chunkMemory = totalChunks * 1000; // Rough estimate per chunk const embeddingMemory = totalChunks * embeddingDimension * 4; // 4 bytes per float const memoryUsage = chunkMemory + embeddingMemory; return { totalChunks, totalDocuments, embeddingDimension, memoryUsage }; } async save(filePath) { const data = { chunks: Array.from(this.chunks.entries()).map(([id, chunk]) => ({ id, chunk, embedding: Array.from(this.embeddings.get(id) || []) })), documentChunks: Array.from(this.documentChunks.entries()).map(([docId, chunkIds]) => ({ docId, chunkIds: Array.from(chunkIds) })) }; await fs.mkdir(path.dirname(filePath), { recursive: true }); await fs.writeFile(filePath, JSON.stringify(data, null, 2)); } async load(filePath) { const content = await fs.readFile(filePath, 'utf-8'); const data = JSON.parse(content); this.clear(); // Restore chunks and embeddings for (const item of data.chunks) { this.chunks.set(item.id, item.chunk); this.embeddings.set(item.id, new Float32Array(item.embedding)); } // Restore document-chunk relationships for (const item of data.documentChunks) { this.documentChunks.set(item.docId, new Set(item.chunkIds)); } } } //# sourceMappingURL=vector-store.js.map