@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
164 lines • 5.71 kB
JavaScript
import { promises as fs } from 'fs';
import path from 'path';
export class RAGPipeline {
config;
embeddingModel;
vectorStore;
documentProcessor;
initialized = false;
constructor(config, embeddingModel, vectorStore, documentProcessor) {
this.config = config;
this.embeddingModel = embeddingModel;
this.vectorStore = vectorStore;
this.documentProcessor = documentProcessor;
}
async initialize() {
if (this.initialized)
return;
// Ensure persistence directory exists
if (this.config.persistencePath) {
try {
await fs.access(this.config.persistencePath);
}
catch {
await fs.mkdir(this.config.persistencePath, { recursive: true });
}
// Try to load existing index
try {
await this.loadIndex();
}
catch {
// No existing index, that's fine
}
}
this.initialized = true;
}
async indexDocument(documentPath) {
// Read document content
const content = await fs.readFile(documentPath, 'utf-8');
// Parse document
const document = await this.documentProcessor.parse(content, documentPath);
// Generate chunks
const chunks = await this.documentProcessor.chunk(document, this.config.chunkSize, this.config.chunkOverlap);
// Update document with chunks
document.chunks = chunks;
// Add chunks to vector store
await this.vectorStore.addChunks(chunks);
return document;
}
async indexDirectory(directoryPath, progressCallback) {
const result = {
indexed: 0,
failed: 0,
documents: [],
errors: []
};
// Get all files recursively
const files = await this.getMarkdownFiles(directoryPath);
const total = files.length;
for (let i = 0; i < files.length; i++) {
try {
const document = await this.indexDocument(files[i]);
result.documents.push(document);
result.indexed++;
}
catch (error) {
result.failed++;
result.errors.push(`Failed to index ${files[i]}: ${error}`);
}
if (progressCallback) {
progressCallback(i + 1, total);
}
}
// Save index after indexing
if (this.config.persistencePath) {
await this.saveIndex();
}
return result;
}
async indexCollection(collectionName) {
const collection = this.config.collections[collectionName];
if (!collection) {
throw new Error('Collection not found');
}
const result = {
indexed: 0,
failed: 0,
documents: [],
errors: []
};
for (const collectionPath of collection.paths) {
const dirResult = await this.indexDirectory(collectionPath);
result.indexed += dirResult.indexed;
result.failed += dirResult.failed;
result.documents.push(...dirResult.documents);
result.errors.push(...dirResult.errors);
}
return result;
}
async search(query) {
let results;
if (query.filters) {
results = await this.vectorStore.searchWithFilters(query.query, query.limit || 10, query.filters);
}
else {
results = await this.vectorStore.search(query.query, query.limit || 10);
}
// Apply threshold filter if specified
if (query.threshold) {
results = results.filter(r => r.score >= query.threshold);
}
return results;
}
async getStats() {
const stats = this.vectorStore.getStats();
const now = new Date().toISOString();
return {
totalDocuments: stats.totalDocuments,
totalChunks: stats.totalChunks,
totalCollections: Object.keys(this.config.collections).length,
indexSize: stats.memoryUsage,
lastIndexed: now,
collections: {} // Would need to track this separately for full implementation
};
}
async clearIndex() {
await this.vectorStore.clear();
}
async saveIndex() {
if (!this.config.persistencePath) {
throw new Error('No persistence path configured');
}
const indexPath = path.join(this.config.persistencePath, 'vector-store.json');
await this.vectorStore.save(indexPath);
}
async loadIndex() {
if (!this.config.persistencePath) {
throw new Error('No persistence path configured');
}
const indexPath = path.join(this.config.persistencePath, 'vector-store.json');
await this.vectorStore.load(indexPath);
}
async getMarkdownFiles(dir) {
const files = [];
try {
const entries = await fs.readdir(dir);
for (const entry of entries) {
const fullPath = path.join(dir, entry);
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
const subFiles = await this.getMarkdownFiles(fullPath);
files.push(...subFiles);
}
else if (stat.isFile() && entry.endsWith('.md')) {
files.push(fullPath);
}
}
}
catch (error) {
// Ignore directories we can't read
}
return files;
}
}
//# sourceMappingURL=rag-pipeline.js.map