@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
218 lines (179 loc) • 5.53 kB
text/typescript
import { promises as fs } from 'fs';
import path from 'path';
import type {
RAGConfig,
RAGDocument,
RAGSearchQuery,
RAGSearchResult,
RAGStats,
EmbeddingModel,
DocumentProcessor,
RAGVectorStore
} from './types.js';
export interface IndexingResult {
indexed: number;
failed: number;
documents: RAGDocument[];
errors: string[];
}
export class RAGPipeline {
private initialized = false;
constructor(
private config: RAGConfig,
private embeddingModel: EmbeddingModel,
private vectorStore: RAGVectorStore,
private documentProcessor: DocumentProcessor
) {}
async initialize(): Promise<void> {
if (this.initialized) return;
// Ensure persistence directory exists
if (this.config.persistencePath) {
try {
await fs.access(this.config.persistencePath);
} catch {
await fs.mkdir(this.config.persistencePath, { recursive: true });
}
// Try to load existing index
try {
await this.loadIndex();
} catch {
// No existing index, that's fine
}
}
this.initialized = true;
}
async indexDocument(documentPath: string): Promise<RAGDocument> {
// Read document content
const content = await fs.readFile(documentPath, 'utf-8');
// Parse document
const document = await this.documentProcessor.parse(content, documentPath);
// Generate chunks
const chunks = await this.documentProcessor.chunk(
document,
this.config.chunkSize,
this.config.chunkOverlap
);
// Update document with chunks
document.chunks = chunks;
// Add chunks to vector store
await this.vectorStore.addChunks(chunks);
return document;
}
async indexDirectory(
directoryPath: string,
progressCallback?: (current: number, total: number) => void
): Promise<IndexingResult> {
const result: IndexingResult = {
indexed: 0,
failed: 0,
documents: [],
errors: []
};
// Get all files recursively
const files = await this.getMarkdownFiles(directoryPath);
const total = files.length;
for (let i = 0; i < files.length; i++) {
try {
const document = await this.indexDocument(files[i]);
result.documents.push(document);
result.indexed++;
} catch (error) {
result.failed++;
result.errors.push(`Failed to index ${files[i]}: ${error}`);
}
if (progressCallback) {
progressCallback(i + 1, total);
}
}
// Save index after indexing
if (this.config.persistencePath) {
await this.saveIndex();
}
return result;
}
async indexCollection(collectionName: string): Promise<IndexingResult> {
const collection = this.config.collections[collectionName];
if (!collection) {
throw new Error('Collection not found');
}
const result: IndexingResult = {
indexed: 0,
failed: 0,
documents: [],
errors: []
};
for (const collectionPath of collection.paths) {
const dirResult = await this.indexDirectory(collectionPath);
result.indexed += dirResult.indexed;
result.failed += dirResult.failed;
result.documents.push(...dirResult.documents);
result.errors.push(...dirResult.errors);
}
return result;
}
async search(query: RAGSearchQuery): Promise<RAGSearchResult[]> {
let results: RAGSearchResult[];
if (query.filters) {
results = await this.vectorStore.searchWithFilters(
query.query,
query.limit || 10,
query.filters
);
} else {
results = await this.vectorStore.search(query.query, query.limit || 10);
}
// Apply threshold filter if specified
if (query.threshold) {
results = results.filter(r => r.score >= query.threshold);
}
return results;
}
async getStats(): Promise<RAGStats> {
const stats = this.vectorStore.getStats();
const now = new Date().toISOString();
return {
totalDocuments: stats.totalDocuments,
totalChunks: stats.totalChunks,
totalCollections: Object.keys(this.config.collections).length,
indexSize: stats.memoryUsage,
lastIndexed: now,
collections: {} // Would need to track this separately for full implementation
};
}
async clearIndex(): Promise<void> {
await this.vectorStore.clear();
}
async saveIndex(): Promise<void> {
if (!this.config.persistencePath) {
throw new Error('No persistence path configured');
}
const indexPath = path.join(this.config.persistencePath, 'vector-store.json');
await this.vectorStore.save(indexPath);
}
async loadIndex(): Promise<void> {
if (!this.config.persistencePath) {
throw new Error('No persistence path configured');
}
const indexPath = path.join(this.config.persistencePath, 'vector-store.json');
await this.vectorStore.load(indexPath);
}
private async getMarkdownFiles(dir: string): Promise<string[]> {
const files: string[] = [];
try {
const entries = await fs.readdir(dir);
for (const entry of entries) {
const fullPath = path.join(dir, entry);
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
const subFiles = await this.getMarkdownFiles(fullPath);
files.push(...subFiles);
} else if (stat.isFile() && entry.endsWith('.md')) {
files.push(fullPath);
}
}
} catch (error) {
// Ignore directories we can't read
}
return files;
}
}