UNPKG

@boundless-oss/atlas

Version:

Atlas - MCP Server for comprehensive startup project management

218 lines (179 loc) 5.53 kB
import { promises as fs } from 'fs'; import path from 'path'; import type { RAGConfig, RAGDocument, RAGSearchQuery, RAGSearchResult, RAGStats, EmbeddingModel, DocumentProcessor, RAGVectorStore } from './types.js'; export interface IndexingResult { indexed: number; failed: number; documents: RAGDocument[]; errors: string[]; } export class RAGPipeline { private initialized = false; constructor( private config: RAGConfig, private embeddingModel: EmbeddingModel, private vectorStore: RAGVectorStore, private documentProcessor: DocumentProcessor ) {} async initialize(): Promise<void> { if (this.initialized) return; // Ensure persistence directory exists if (this.config.persistencePath) { try { await fs.access(this.config.persistencePath); } catch { await fs.mkdir(this.config.persistencePath, { recursive: true }); } // Try to load existing index try { await this.loadIndex(); } catch { // No existing index, that's fine } } this.initialized = true; } async indexDocument(documentPath: string): Promise<RAGDocument> { // Read document content const content = await fs.readFile(documentPath, 'utf-8'); // Parse document const document = await this.documentProcessor.parse(content, documentPath); // Generate chunks const chunks = await this.documentProcessor.chunk( document, this.config.chunkSize, this.config.chunkOverlap ); // Update document with chunks document.chunks = chunks; // Add chunks to vector store await this.vectorStore.addChunks(chunks); return document; } async indexDirectory( directoryPath: string, progressCallback?: (current: number, total: number) => void ): Promise<IndexingResult> { const result: IndexingResult = { indexed: 0, failed: 0, documents: [], errors: [] }; // Get all files recursively const files = await this.getMarkdownFiles(directoryPath); const total = files.length; for (let i = 0; i < files.length; i++) { try { const document = await this.indexDocument(files[i]); result.documents.push(document); result.indexed++; } catch (error) { result.failed++; result.errors.push(`Failed to index ${files[i]}: ${error}`); } if (progressCallback) { progressCallback(i + 1, total); } } // Save index after indexing if (this.config.persistencePath) { await this.saveIndex(); } return result; } async indexCollection(collectionName: string): Promise<IndexingResult> { const collection = this.config.collections[collectionName]; if (!collection) { throw new Error('Collection not found'); } const result: IndexingResult = { indexed: 0, failed: 0, documents: [], errors: [] }; for (const collectionPath of collection.paths) { const dirResult = await this.indexDirectory(collectionPath); result.indexed += dirResult.indexed; result.failed += dirResult.failed; result.documents.push(...dirResult.documents); result.errors.push(...dirResult.errors); } return result; } async search(query: RAGSearchQuery): Promise<RAGSearchResult[]> { let results: RAGSearchResult[]; if (query.filters) { results = await this.vectorStore.searchWithFilters( query.query, query.limit || 10, query.filters ); } else { results = await this.vectorStore.search(query.query, query.limit || 10); } // Apply threshold filter if specified if (query.threshold) { results = results.filter(r => r.score >= query.threshold); } return results; } async getStats(): Promise<RAGStats> { const stats = this.vectorStore.getStats(); const now = new Date().toISOString(); return { totalDocuments: stats.totalDocuments, totalChunks: stats.totalChunks, totalCollections: Object.keys(this.config.collections).length, indexSize: stats.memoryUsage, lastIndexed: now, collections: {} // Would need to track this separately for full implementation }; } async clearIndex(): Promise<void> { await this.vectorStore.clear(); } async saveIndex(): Promise<void> { if (!this.config.persistencePath) { throw new Error('No persistence path configured'); } const indexPath = path.join(this.config.persistencePath, 'vector-store.json'); await this.vectorStore.save(indexPath); } async loadIndex(): Promise<void> { if (!this.config.persistencePath) { throw new Error('No persistence path configured'); } const indexPath = path.join(this.config.persistencePath, 'vector-store.json'); await this.vectorStore.load(indexPath); } private async getMarkdownFiles(dir: string): Promise<string[]> { const files: string[] = []; try { const entries = await fs.readdir(dir); for (const entry of entries) { const fullPath = path.join(dir, entry); const stat = await fs.stat(fullPath); if (stat.isDirectory()) { const subFiles = await this.getMarkdownFiles(fullPath); files.push(...subFiles); } else if (stat.isFile() && entry.endsWith('.md')) { files.push(fullPath); } } } catch (error) { // Ignore directories we can't read } return files; } }