UNPKG

@boundless-oss/atlas

Version:

Atlas - MCP Server for comprehensive startup project management

499 lines (439 loc) 13.3 kB
import { describe, it, expect, beforeEach, vi } from 'vitest'; import { InMemoryVectorStore } from '../vector-store.js'; import { LocalEmbeddingModel } from '../embeddings.js'; import type { RAGChunk, RAGSearchResult } from '../types.js'; describe('InMemoryVectorStore', () => { let store: InMemoryVectorStore; let embeddingModel: LocalEmbeddingModel; beforeEach(() => { embeddingModel = new LocalEmbeddingModel(); store = new InMemoryVectorStore(embeddingModel); }); describe('initialization', () => { it('should initialize with empty store', () => { expect(store.size()).toBe(0); }); it('should initialize with provided embedding model', () => { expect(store).toBeDefined(); expect(store.size()).toBe(0); }); }); describe('addChunk', () => { it('should add a single chunk with embedding', async () => { const chunk: RAGChunk = { id: 'chunk-1', documentId: 'doc-1', content: 'Test content', index: 0, metadata: { startOffset: 0, endOffset: 12, type: 'text' } }; await store.addChunk(chunk); expect(store.size()).toBe(1); }); it('should generate embedding if not provided', async () => { const chunk: RAGChunk = { id: 'chunk-1', documentId: 'doc-1', content: 'Test content', index: 0, metadata: { startOffset: 0, endOffset: 12, type: 'text' } }; const spy = vi.spyOn(embeddingModel, 'embedSingle'); await store.addChunk(chunk); expect(spy).toHaveBeenCalledWith('Test content'); expect(store.size()).toBe(1); }); it('should use provided embedding', async () => { const embedding = new Float32Array(384).fill(0.5); const chunk: RAGChunk = { id: 'chunk-1', documentId: 'doc-1', content: 'Test content', startOffset: 0, endOffset: 12, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' }, embedding }; const spy = vi.spyOn(embeddingModel, 'embedSingle'); await store.addChunk(chunk); expect(spy).not.toHaveBeenCalled(); expect(store.size()).toBe(1); }); it('should handle duplicate chunk IDs', async () => { const chunk1: RAGChunk = { id: 'chunk-1', documentId: 'doc-1', content: 'Test content 1', startOffset: 0, endOffset: 14, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }; const chunk2: RAGChunk = { id: 'chunk-1', documentId: 'doc-1', content: 'Test content 2', startOffset: 0, endOffset: 14, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }; await store.addChunk(chunk1); await store.addChunk(chunk2); expect(store.size()).toBe(1); }); }); describe('addChunks', () => { it('should add multiple chunks', async () => { const chunks: RAGChunk[] = [ { id: 'chunk-1', documentId: 'doc-1', content: 'Test content 1', startOffset: 0, endOffset: 14, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }, { id: 'chunk-2', documentId: 'doc-1', content: 'Test content 2', startOffset: 15, endOffset: 29, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } } ]; await store.addChunks(chunks); expect(store.size()).toBe(2); }); it('should batch embed chunks for efficiency', async () => { const chunks: RAGChunk[] = Array.from({ length: 10 }, (_, i) => ({ id: `chunk-${i}`, documentId: 'doc-1', content: `Test content ${i}`, startOffset: i * 15, endOffset: (i + 1) * 15, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } })); const spy = vi.spyOn(embeddingModel, 'embed'); await store.addChunks(chunks); expect(spy).toHaveBeenCalledOnce(); expect(spy).toHaveBeenCalledWith(chunks.map(c => c.content)); expect(store.size()).toBe(10); }); }); describe('search', () => { beforeEach(async () => { const chunks: RAGChunk[] = [ { id: 'chunk-1', documentId: 'doc-1', content: 'The quick brown fox jumps over the lazy dog', startOffset: 0, endOffset: 44, metadata: { topic: 'animals' } }, { id: 'chunk-2', documentId: 'doc-1', content: 'Machine learning is a subset of artificial intelligence', startOffset: 45, endOffset: 100, metadata: { topic: 'technology' } }, { id: 'chunk-3', documentId: 'doc-2', content: 'Deep learning uses neural networks with multiple layers', startOffset: 0, endOffset: 55, metadata: { topic: 'technology' } } ]; await store.addChunks(chunks); }); it('should find relevant chunks', async () => { const results = await store.search('artificial intelligence', 2); expect(results).toHaveLength(2); expect(results[0].chunk.id).toBe('chunk-2'); expect(results[0].score).toBeGreaterThan(0); expect(results[0].score).toBeLessThanOrEqual(1); }); it('should respect k parameter', async () => { const results = await store.search('technology', 1); expect(results).toHaveLength(1); expect(results[0].score).toBeGreaterThan(0); }); it('should return empty array for no matches', async () => { await store.clear(); const results = await store.search('quantum physics', 5); expect(results).toHaveLength(0); }); it('should rank results by similarity', async () => { const results = await store.search('machine learning AI', 3); expect(results).toHaveLength(3); expect(results[0].score).toBeGreaterThanOrEqual(results[1].score); expect(results[1].score).toBeGreaterThanOrEqual(results[2].score); }); it('should include all chunk information in results', async () => { const results = await store.search('fox', 1); expect(results[0]).toHaveProperty('chunk'); expect(results[0]).toHaveProperty('score'); expect(results[0].chunk).toHaveProperty('id'); expect(results[0].chunk).toHaveProperty('content'); expect(results[0].chunk).toHaveProperty('metadata'); }); }); describe('searchWithFilters', () => { beforeEach(async () => { const chunks: RAGChunk[] = [ { id: 'chunk-1', documentId: 'doc-1', content: 'Python programming basics', startOffset: 0, endOffset: 25, metadata: { language: 'python', level: 'beginner' } }, { id: 'chunk-2', documentId: 'doc-1', content: 'Advanced Python techniques', startOffset: 26, endOffset: 52, metadata: { language: 'python', level: 'advanced' } }, { id: 'chunk-3', documentId: 'doc-2', content: 'JavaScript programming basics', startOffset: 0, endOffset: 29, metadata: { language: 'javascript', level: 'beginner' } } ]; await store.addChunks(chunks); }); it('should filter by metadata', async () => { const results = await store.searchWithFilters('programming', 5, { language: 'python' }); expect(results).toHaveLength(2); expect(results.every(r => r.chunk.metadata.language === 'python')).toBe(true); }); it('should filter by multiple metadata fields', async () => { const results = await store.searchWithFilters('programming', 5, { language: 'python', level: 'beginner' }); expect(results).toHaveLength(1); expect(results[0].chunk.id).toBe('chunk-1'); }); it('should return empty array if no matches after filtering', async () => { const results = await store.searchWithFilters('programming', 5, { language: 'rust' }); expect(results).toHaveLength(0); }); }); describe('removeDocument', () => { it('should remove all chunks from a document', async () => { const chunks: RAGChunk[] = [ { id: 'chunk-1', documentId: 'doc-1', content: 'Content 1', startOffset: 0, endOffset: 9, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }, { id: 'chunk-2', documentId: 'doc-1', content: 'Content 2', startOffset: 10, endOffset: 19, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }, { id: 'chunk-3', documentId: 'doc-2', content: 'Content 3', startOffset: 0, endOffset: 9, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } } ]; await store.addChunks(chunks); expect(store.size()).toBe(3); await store.removeDocument('doc-1'); expect(store.size()).toBe(1); const results = await store.search('Content', 10); expect(results).toHaveLength(1); expect(results[0].chunk.documentId).toBe('doc-2'); }); it('should handle removing non-existent document', async () => { await store.removeDocument('non-existent'); expect(store.size()).toBe(0); }); }); describe('clear', () => { it('should remove all chunks', async () => { const chunks: RAGChunk[] = Array.from({ length: 5 }, (_, i) => ({ id: `chunk-${i}`, documentId: 'doc-1', content: `Content ${i}`, startOffset: i * 10, endOffset: (i + 1) * 10, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } })); await store.addChunks(chunks); expect(store.size()).toBe(5); await store.clear(); expect(store.size()).toBe(0); const results = await store.search('Content', 10); expect(results).toHaveLength(0); }); }); describe('getStats', () => { it('should return store statistics', async () => { const chunks: RAGChunk[] = [ { id: 'chunk-1', documentId: 'doc-1', content: 'Content 1', startOffset: 0, endOffset: 9, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }, { id: 'chunk-2', documentId: 'doc-1', content: 'Content 2', startOffset: 10, endOffset: 19, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } }, { id: 'chunk-3', documentId: 'doc-2', content: 'Content 3', startOffset: 0, endOffset: 9, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } } ]; await store.addChunks(chunks); const stats = store.getStats(); expect(stats.totalChunks).toBe(3); expect(stats.totalDocuments).toBe(2); expect(stats.embeddingDimension).toBe(384); expect(stats.memoryUsage).toBeGreaterThan(0); }); }); describe('persistence', () => { it('should save store to disk', async () => { const chunks: RAGChunk[] = [ { id: 'chunk-1', documentId: 'doc-1', content: 'Test content', startOffset: 0, endOffset: 12, index: 0, metadata: { startOffset: 0, endOffset: 10, type: 'text' } } ]; await store.addChunks(chunks); const savedPath = '.atlas/rag/test-vector-store.json'; await store.save(savedPath); // Create new store and load const newStore = new InMemoryVectorStore(embeddingModel); await newStore.load(savedPath); expect(newStore.size()).toBe(1); const results = await newStore.search('Test', 1); expect(results[0].chunk.id).toBe('chunk-1'); }); it('should handle load errors gracefully', async () => { const newStore = new InMemoryVectorStore(embeddingModel); await expect( newStore.load('non-existent-file.json') ).rejects.toThrow(); }); }); });