@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
191 lines • 7.25 kB
JavaScript
import { promises as fs } from 'fs';
import path from 'path';
export class InMemoryVectorStore {
embeddingModel;
chunks = new Map();
embeddings = new Map();
documentChunks = new Map();
constructor(embeddingModel) {
this.embeddingModel = embeddingModel;
}
async addChunk(chunk) {
// Generate embedding if not provided
let embedding = chunk.embedding;
if (!embedding) {
embedding = await this.embeddingModel.embedSingle(chunk.content);
}
// Store chunk and embedding
this.chunks.set(chunk.id, chunk);
this.embeddings.set(chunk.id, embedding);
// Track document-chunk relationship
if (!this.documentChunks.has(chunk.documentId)) {
this.documentChunks.set(chunk.documentId, new Set());
}
this.documentChunks.get(chunk.documentId).add(chunk.id);
}
async addChunks(chunks) {
if (chunks.length === 0)
return;
// Separate chunks with and without embeddings
const chunksNeedingEmbedding = [];
const chunksWithEmbedding = [];
for (const chunk of chunks) {
if (chunk.embedding) {
chunksWithEmbedding.push(chunk);
}
else {
chunksNeedingEmbedding.push(chunk);
}
}
// Batch generate embeddings for chunks that need them
let generatedEmbeddings = [];
if (chunksNeedingEmbedding.length > 0) {
const texts = chunksNeedingEmbedding.map(c => c.content);
generatedEmbeddings = await this.embeddingModel.embed(texts);
}
// Store all chunks
for (const chunk of chunksWithEmbedding) {
this.chunks.set(chunk.id, chunk);
this.embeddings.set(chunk.id, chunk.embedding);
if (!this.documentChunks.has(chunk.documentId)) {
this.documentChunks.set(chunk.documentId, new Set());
}
this.documentChunks.get(chunk.documentId).add(chunk.id);
}
for (let i = 0; i < chunksNeedingEmbedding.length; i++) {
const chunk = chunksNeedingEmbedding[i];
const embedding = generatedEmbeddings[i];
this.chunks.set(chunk.id, chunk);
this.embeddings.set(chunk.id, embedding);
if (!this.documentChunks.has(chunk.documentId)) {
this.documentChunks.set(chunk.documentId, new Set());
}
this.documentChunks.get(chunk.documentId).add(chunk.id);
}
}
async search(query, k) {
if (this.chunks.size === 0)
return [];
// Generate query embedding
const queryEmbedding = await this.embeddingModel.embedSingle(query);
// Calculate similarities for all chunks
const results = [];
for (const [chunkId, chunk] of this.chunks) {
const chunkEmbedding = this.embeddings.get(chunkId);
if (!chunkEmbedding)
continue;
const score = this.embeddingModel.cosineSimilarity(queryEmbedding, chunkEmbedding);
results.push({
chunk,
score,
document: {
id: chunk.documentId,
path: chunk.documentId // Using documentId as path for now
}
});
}
// Sort by score and return top k
results.sort((a, b) => b.score - a.score);
return results.slice(0, k);
}
async searchWithFilters(query, k, filters) {
if (this.chunks.size === 0)
return [];
// Generate query embedding
const queryEmbedding = await this.embeddingModel.embedSingle(query);
// Calculate similarities for filtered chunks
const results = [];
for (const [chunkId, chunk] of this.chunks) {
// Apply filters
let matchesFilters = true;
for (const [key, value] of Object.entries(filters)) {
if (chunk.metadata[key] !== value) {
matchesFilters = false;
break;
}
}
if (!matchesFilters)
continue;
const chunkEmbedding = this.embeddings.get(chunkId);
if (!chunkEmbedding)
continue;
const score = this.embeddingModel.cosineSimilarity(queryEmbedding, chunkEmbedding);
results.push({
chunk,
score,
document: {
id: chunk.documentId,
path: chunk.documentId // Using documentId as path for now
}
});
}
// Sort by score and return top k
results.sort((a, b) => b.score - a.score);
return results.slice(0, k);
}
async removeDocument(documentId) {
const chunkIds = this.documentChunks.get(documentId);
if (!chunkIds)
return;
// Remove all chunks for this document
for (const chunkId of chunkIds) {
this.chunks.delete(chunkId);
this.embeddings.delete(chunkId);
}
// Remove document tracking
this.documentChunks.delete(documentId);
}
async clear() {
this.chunks.clear();
this.embeddings.clear();
this.documentChunks.clear();
}
size() {
return this.chunks.size;
}
getStats() {
const totalChunks = this.chunks.size;
const totalDocuments = this.documentChunks.size;
const embeddingDimension = this.embeddingModel.dimension;
// Calculate memory usage (rough estimate)
const chunkMemory = totalChunks * 1000; // Rough estimate per chunk
const embeddingMemory = totalChunks * embeddingDimension * 4; // 4 bytes per float
const memoryUsage = chunkMemory + embeddingMemory;
return {
totalChunks,
totalDocuments,
embeddingDimension,
memoryUsage
};
}
async save(filePath) {
const data = {
chunks: Array.from(this.chunks.entries()).map(([id, chunk]) => ({
id,
chunk,
embedding: Array.from(this.embeddings.get(id) || [])
})),
documentChunks: Array.from(this.documentChunks.entries()).map(([docId, chunkIds]) => ({
docId,
chunkIds: Array.from(chunkIds)
}))
};
await fs.mkdir(path.dirname(filePath), { recursive: true });
await fs.writeFile(filePath, JSON.stringify(data, null, 2));
}
async load(filePath) {
const content = await fs.readFile(filePath, 'utf-8');
const data = JSON.parse(content);
this.clear();
// Restore chunks and embeddings
for (const item of data.chunks) {
this.chunks.set(item.id, item.chunk);
this.embeddings.set(item.id, new Float32Array(item.embedding));
}
// Restore document-chunk relationships
for (const item of data.documentChunks) {
this.documentChunks.set(item.docId, new Set(item.chunkIds));
}
}
}
//# sourceMappingURL=vector-store.js.map