mongodocs-mcp
Version:
Lightning-fast semantic search for MongoDB documentation via Model Context Protocol. 10,000+ documents, <500ms search.
60 lines • 2.07 kB
TypeScript
/**
* ENHANCED Voyage AI Embedding Pipeline
* Uses voyage-context-3 model for TRUE contextualized 2048-dimensional embeddings
*
* RESEARCH-BASED IMPROVEMENTS:
* - RAGFlow-inspired dynamic batching with memory management
* - LightRAG-inspired parallel processing with MAX_ASYNC control
* - True contextual document grouping for maximum embedding quality
*/
import { ChunkedDocument } from '../types/index.js';
export declare class EmbeddingPipeline {
private voyageApiKey;
private voyageContextualUrl;
private mongodb;
private chunker;
private rateLimiter;
private readonly MAX_TOTAL_TOKENS;
private readonly MAX_TOTAL_CHUNKS;
private dynamicBatchSize;
private readonly MIN_BATCH_SIZE;
private readonly VOYAGE_DIMENSIONS;
constructor();
/**
* Embed all documents using Voyage Context-3's contextualized embeddings
* This is the KEY differentiator - chunks are embedded with document context
*/
embedAllDocuments(chunkedDocs: ChunkedDocument[]): Promise<void>;
/**
* Process a batch of documents with TRUE contextualized embeddings
* Each document's chunks are embedded together for global context awareness
*/
private processBatch;
/**
* Call Voyage API with TRUE contextualized embeddings - USING THE CORRECT ENDPOINT!
* This is the GAME CHANGER - chunks are embedded with full document context!
*/
private callContextualizedEmbed;
/**
* Prepare vector documents for MongoDB insertion
*/
private prepareVectorDocuments;
/**
* Insert vector documents into MongoDB Atlas
*/
private insertToMongoDB;
/**
* Update existing documents with new embeddings
*/
private updateExistingDocuments;
/**
* Create batches of documents respecting Voyage API limits
*/
private createBatches;
/**
* Embed a single query for search using contextualized endpoint
*/
embedQuery(query: string): Promise<number[]>;
private sleep;
}
//# sourceMappingURL=embedding-pipeline.d.ts.map