@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
499 lines (439 loc) • 13.3 kB
text/typescript
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { InMemoryVectorStore } from '../vector-store.js';
import { LocalEmbeddingModel } from '../embeddings.js';
import type { RAGChunk, RAGSearchResult } from '../types.js';
describe('InMemoryVectorStore', () => {
let store: InMemoryVectorStore;
let embeddingModel: LocalEmbeddingModel;
beforeEach(() => {
embeddingModel = new LocalEmbeddingModel();
store = new InMemoryVectorStore(embeddingModel);
});
describe('initialization', () => {
it('should initialize with empty store', () => {
expect(store.size()).toBe(0);
});
it('should initialize with provided embedding model', () => {
expect(store).toBeDefined();
expect(store.size()).toBe(0);
});
});
describe('addChunk', () => {
it('should add a single chunk with embedding', async () => {
const chunk: RAGChunk = {
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content',
index: 0,
metadata: {
startOffset: 0,
endOffset: 12,
type: 'text'
}
};
await store.addChunk(chunk);
expect(store.size()).toBe(1);
});
it('should generate embedding if not provided', async () => {
const chunk: RAGChunk = {
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content',
index: 0,
metadata: {
startOffset: 0,
endOffset: 12,
type: 'text'
}
};
const spy = vi.spyOn(embeddingModel, 'embedSingle');
await store.addChunk(chunk);
expect(spy).toHaveBeenCalledWith('Test content');
expect(store.size()).toBe(1);
});
it('should use provided embedding', async () => {
const embedding = new Float32Array(384).fill(0.5);
const chunk: RAGChunk = {
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content',
startOffset: 0,
endOffset: 12,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
},
embedding
};
const spy = vi.spyOn(embeddingModel, 'embedSingle');
await store.addChunk(chunk);
expect(spy).not.toHaveBeenCalled();
expect(store.size()).toBe(1);
});
it('should handle duplicate chunk IDs', async () => {
const chunk1: RAGChunk = {
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content 1',
startOffset: 0,
endOffset: 14,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
};
const chunk2: RAGChunk = {
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content 2',
startOffset: 0,
endOffset: 14,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
};
await store.addChunk(chunk1);
await store.addChunk(chunk2);
expect(store.size()).toBe(1);
});
});
describe('addChunks', () => {
it('should add multiple chunks', async () => {
const chunks: RAGChunk[] = [
{
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content 1',
startOffset: 0,
endOffset: 14,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
},
{
id: 'chunk-2',
documentId: 'doc-1',
content: 'Test content 2',
startOffset: 15,
endOffset: 29,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
}
];
await store.addChunks(chunks);
expect(store.size()).toBe(2);
});
it('should batch embed chunks for efficiency', async () => {
const chunks: RAGChunk[] = Array.from({ length: 10 }, (_, i) => ({
id: `chunk-${i}`,
documentId: 'doc-1',
content: `Test content ${i}`,
startOffset: i * 15,
endOffset: (i + 1) * 15,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
}));
const spy = vi.spyOn(embeddingModel, 'embed');
await store.addChunks(chunks);
expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith(chunks.map(c => c.content));
expect(store.size()).toBe(10);
});
});
describe('search', () => {
beforeEach(async () => {
const chunks: RAGChunk[] = [
{
id: 'chunk-1',
documentId: 'doc-1',
content: 'The quick brown fox jumps over the lazy dog',
startOffset: 0,
endOffset: 44,
metadata: { topic: 'animals' }
},
{
id: 'chunk-2',
documentId: 'doc-1',
content: 'Machine learning is a subset of artificial intelligence',
startOffset: 45,
endOffset: 100,
metadata: { topic: 'technology' }
},
{
id: 'chunk-3',
documentId: 'doc-2',
content: 'Deep learning uses neural networks with multiple layers',
startOffset: 0,
endOffset: 55,
metadata: { topic: 'technology' }
}
];
await store.addChunks(chunks);
});
it('should find relevant chunks', async () => {
const results = await store.search('artificial intelligence', 2);
expect(results).toHaveLength(2);
expect(results[0].chunk.id).toBe('chunk-2');
expect(results[0].score).toBeGreaterThan(0);
expect(results[0].score).toBeLessThanOrEqual(1);
});
it('should respect k parameter', async () => {
const results = await store.search('technology', 1);
expect(results).toHaveLength(1);
expect(results[0].score).toBeGreaterThan(0);
});
it('should return empty array for no matches', async () => {
await store.clear();
const results = await store.search('quantum physics', 5);
expect(results).toHaveLength(0);
});
it('should rank results by similarity', async () => {
const results = await store.search('machine learning AI', 3);
expect(results).toHaveLength(3);
expect(results[0].score).toBeGreaterThanOrEqual(results[1].score);
expect(results[1].score).toBeGreaterThanOrEqual(results[2].score);
});
it('should include all chunk information in results', async () => {
const results = await store.search('fox', 1);
expect(results[0]).toHaveProperty('chunk');
expect(results[0]).toHaveProperty('score');
expect(results[0].chunk).toHaveProperty('id');
expect(results[0].chunk).toHaveProperty('content');
expect(results[0].chunk).toHaveProperty('metadata');
});
});
describe('searchWithFilters', () => {
beforeEach(async () => {
const chunks: RAGChunk[] = [
{
id: 'chunk-1',
documentId: 'doc-1',
content: 'Python programming basics',
startOffset: 0,
endOffset: 25,
metadata: { language: 'python', level: 'beginner' }
},
{
id: 'chunk-2',
documentId: 'doc-1',
content: 'Advanced Python techniques',
startOffset: 26,
endOffset: 52,
metadata: { language: 'python', level: 'advanced' }
},
{
id: 'chunk-3',
documentId: 'doc-2',
content: 'JavaScript programming basics',
startOffset: 0,
endOffset: 29,
metadata: { language: 'javascript', level: 'beginner' }
}
];
await store.addChunks(chunks);
});
it('should filter by metadata', async () => {
const results = await store.searchWithFilters('programming', 5, {
language: 'python'
});
expect(results).toHaveLength(2);
expect(results.every(r => r.chunk.metadata.language === 'python')).toBe(true);
});
it('should filter by multiple metadata fields', async () => {
const results = await store.searchWithFilters('programming', 5, {
language: 'python',
level: 'beginner'
});
expect(results).toHaveLength(1);
expect(results[0].chunk.id).toBe('chunk-1');
});
it('should return empty array if no matches after filtering', async () => {
const results = await store.searchWithFilters('programming', 5, {
language: 'rust'
});
expect(results).toHaveLength(0);
});
});
describe('removeDocument', () => {
it('should remove all chunks from a document', async () => {
const chunks: RAGChunk[] = [
{
id: 'chunk-1',
documentId: 'doc-1',
content: 'Content 1',
startOffset: 0,
endOffset: 9,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
},
{
id: 'chunk-2',
documentId: 'doc-1',
content: 'Content 2',
startOffset: 10,
endOffset: 19,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
},
{
id: 'chunk-3',
documentId: 'doc-2',
content: 'Content 3',
startOffset: 0,
endOffset: 9,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
}
];
await store.addChunks(chunks);
expect(store.size()).toBe(3);
await store.removeDocument('doc-1');
expect(store.size()).toBe(1);
const results = await store.search('Content', 10);
expect(results).toHaveLength(1);
expect(results[0].chunk.documentId).toBe('doc-2');
});
it('should handle removing non-existent document', async () => {
await store.removeDocument('non-existent');
expect(store.size()).toBe(0);
});
});
describe('clear', () => {
it('should remove all chunks', async () => {
const chunks: RAGChunk[] = Array.from({ length: 5 }, (_, i) => ({
id: `chunk-${i}`,
documentId: 'doc-1',
content: `Content ${i}`,
startOffset: i * 10,
endOffset: (i + 1) * 10,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
}));
await store.addChunks(chunks);
expect(store.size()).toBe(5);
await store.clear();
expect(store.size()).toBe(0);
const results = await store.search('Content', 10);
expect(results).toHaveLength(0);
});
});
describe('getStats', () => {
it('should return store statistics', async () => {
const chunks: RAGChunk[] = [
{
id: 'chunk-1',
documentId: 'doc-1',
content: 'Content 1',
startOffset: 0,
endOffset: 9,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
},
{
id: 'chunk-2',
documentId: 'doc-1',
content: 'Content 2',
startOffset: 10,
endOffset: 19,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
},
{
id: 'chunk-3',
documentId: 'doc-2',
content: 'Content 3',
startOffset: 0,
endOffset: 9,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
}
];
await store.addChunks(chunks);
const stats = store.getStats();
expect(stats.totalChunks).toBe(3);
expect(stats.totalDocuments).toBe(2);
expect(stats.embeddingDimension).toBe(384);
expect(stats.memoryUsage).toBeGreaterThan(0);
});
});
describe('persistence', () => {
it('should save store to disk', async () => {
const chunks: RAGChunk[] = [
{
id: 'chunk-1',
documentId: 'doc-1',
content: 'Test content',
startOffset: 0,
endOffset: 12,
index: 0,
metadata: {
startOffset: 0,
endOffset: 10,
type: 'text'
}
}
];
await store.addChunks(chunks);
const savedPath = '.atlas/rag/test-vector-store.json';
await store.save(savedPath);
// Create new store and load
const newStore = new InMemoryVectorStore(embeddingModel);
await newStore.load(savedPath);
expect(newStore.size()).toBe(1);
const results = await newStore.search('Test', 1);
expect(results[0].chunk.id).toBe('chunk-1');
});
it('should handle load errors gracefully', async () => {
const newStore = new InMemoryVectorStore(embeddingModel);
await expect(
newStore.load('non-existent-file.json')
).rejects.toThrow();
});
});
});