mongodocs-mcp
Version:
Lightning-fast semantic search for MongoDB documentation via Model Context Protocol. 10,000+ documents, <500ms search.
324 lines ⢠14.3 kB
JavaScript
/**
* ENHANCED Voyage AI Embedding Pipeline
* Uses voyage-context-3 model for TRUE contextualized 2048-dimensional embeddings
*
* RESEARCH-BASED IMPROVEMENTS:
* - RAGFlow-inspired dynamic batching with memory management
* - LightRAG-inspired parallel processing with MAX_ASYNC control
* - True contextual document grouping for maximum embedding quality
*/
import axios from 'axios';
import { MongoDBClient } from './mongodb-client.js';
import { SmartChunker } from './smart-chunker.js';
import pLimit from 'p-limit';
export class EmbeddingPipeline {
voyageApiKey;
voyageContextualUrl = 'https://api.voyageai.com/v1/contextualizedembeddings';
mongodb;
chunker;
rateLimiter = pLimit(3); // Max 3 concurrent API calls
// ENHANCED: Research-based API limits and batching
MAX_TOTAL_TOKENS = 120000;
MAX_TOTAL_CHUNKS = 16000;
dynamicBatchSize = 8; // RAGFlow-inspired: Start with 8, adjust based on performance
MIN_BATCH_SIZE = 2; // Minimum for stability
VOYAGE_DIMENSIONS = 2048; // 2025: MAXIMUM dimensions for best performance
constructor() {
const apiKey = process.env.VOYAGE_API_KEY;
if (!apiKey) {
throw new Error('VOYAGE_API_KEY environment variable is required');
}
this.voyageApiKey = apiKey;
this.mongodb = MongoDBClient.getInstance();
this.chunker = new SmartChunker();
}
/**
* Embed all documents using Voyage Context-3's contextualized embeddings
* This is the KEY differentiator - chunks are embedded with document context
*/
async embedAllDocuments(chunkedDocs) {
console.error(`š§ Starting Voyage Context-3 embedding pipeline...`);
console.error(`š Processing ${chunkedDocs.length} documents with ${chunkedDocs.reduce((sum, d) => sum + d.chunks.length, 0)} total chunks`);
// Create batches respecting API limits
const batches = this.createBatches(chunkedDocs);
console.error(`š¦ Created ${batches.length} batches for processing`);
let processedChunks = 0;
let totalTokensUsed = 0;
for (let i = 0; i < batches.length; i++) {
const batch = batches[i];
console.error(`\nš Processing batch ${i + 1}/${batches.length} (${batch.documents.length} documents, ${batch.totalChunks} chunks)`);
try {
const { embeddings, tokensUsed } = await this.processBatch(batch);
processedChunks += embeddings.length;
totalTokensUsed += tokensUsed;
console.error(`ā
Batch ${i + 1} complete: ${embeddings.length} chunks embedded, ${tokensUsed} tokens used`);
// Rate limiting
if (i < batches.length - 1) {
await this.sleep(1000); // 1 second between batches
}
}
catch (error) {
console.error(`ā Batch ${i + 1} failed:`, error);
throw error;
}
}
console.error(`\nš Embedding complete! Processed ${processedChunks} chunks using ${totalTokensUsed} tokens`);
}
/**
* Process a batch of documents with TRUE contextualized embeddings
* Each document's chunks are embedded together for global context awareness
*/
async processBatch(batch) {
// CRITICAL: Group chunks by document for contextualized_embed
const documentGroups = [];
const metadataGroups = [];
for (const doc of batch.documents) {
const docChunks = [];
const docMetadata = [];
doc.chunks.forEach((chunk, idx) => {
docChunks.push(chunk.content);
docMetadata.push({
documentId: doc.documentId,
chunkIndex: idx,
totalChunks: doc.totalChunks,
...doc.documentMetadata,
...chunk.metadata,
});
});
documentGroups.push(docChunks);
metadataGroups.push(docMetadata);
}
// Call Voyage API with CONTEXTUALIZED embeddings
const embeddings = await this.callContextualizedEmbed(documentGroups);
// Flatten embeddings and metadata for MongoDB
const flatEmbeddings = [];
const flatMetadata = [];
embeddings.forEach((docEmbeddings, docIdx) => {
docEmbeddings.forEach((embedding, chunkIdx) => {
flatEmbeddings.push(embedding);
flatMetadata.push(metadataGroups[docIdx][chunkIdx]);
});
});
// Prepare documents for MongoDB
const vectorDocuments = this.prepareVectorDocuments(flatEmbeddings, batch, flatMetadata);
// Insert into MongoDB
await this.insertToMongoDB(vectorDocuments);
// Estimate token usage (approximate)
const totalTokens = documentGroups.reduce((sum, doc) => sum + doc.reduce((docSum, chunk) => docSum + chunk.length / 4, 0), 0);
return {
embeddings: vectorDocuments,
tokensUsed: Math.round(totalTokens),
};
}
/**
* Call Voyage API with TRUE contextualized embeddings - USING THE CORRECT ENDPOINT!
* This is the GAME CHANGER - chunks are embedded with full document context!
*/
async callContextualizedEmbed(documentGroups) {
const model = 'voyage-context-3';
console.error(`š Using ${model} with RESEARCH-ENHANCED contextualized embeddings!`);
let retryCount = 0;
const maxRetries = 3;
while (retryCount < maxRetries) {
try {
// Call the CORRECT contextualized embeddings endpoint!
const response = await this.rateLimiter(() => axios.post(this.voyageContextualUrl, {
inputs: documentGroups, // Array of arrays - each sub-array is a document's chunks
input_type: 'document',
model: model,
output_dimension: this.VOYAGE_DIMENSIONS
}, {
headers: {
'Authorization': `Bearer ${this.voyageApiKey}`,
'Content-Type': 'application/json',
},
timeout: 60000, // Longer timeout for contextualized embeddings
}));
if (!response.data?.data) {
throw new Error('No data returned from Voyage contextualized API');
}
// Extract embeddings - the response structure is:
// data: [ { data: [ { embedding: [...] }, { embedding: [...] } ] }, ... ]
const allEmbeddings = [];
for (const docResult of response.data.data) {
const docEmbeddings = [];
for (const chunk of docResult.data) {
if (chunk?.embedding) {
// Normalize the embedding for cosine similarity
const embedding = chunk.embedding;
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
const normalized = embedding.map((v) => v / magnitude);
docEmbeddings.push(normalized);
}
}
allEmbeddings.push(docEmbeddings);
}
return allEmbeddings;
break; // Success, exit retry loop
}
catch (error) {
retryCount++;
if (retryCount >= maxRetries) {
console.error(`ā Voyage AI failed after ${maxRetries} retries:`, error.response?.data || error.message);
// RAGFlow-inspired: Try reducing batch size for next time
if (this.dynamicBatchSize > this.MIN_BATCH_SIZE) {
this.dynamicBatchSize = Math.max(this.dynamicBatchSize / 2, this.MIN_BATCH_SIZE);
console.error(`š Reduced batch size to ${this.dynamicBatchSize} for stability`);
}
throw new Error(`Voyage API error: ${error.response?.data?.error || error.message || 'Unknown error'}`);
}
else {
console.error(`ā ļø Voyage AI retry ${retryCount}/${maxRetries}:`, error.response?.data?.message || error.message);
await new Promise(resolve => setTimeout(resolve, 1000 * retryCount)); // Exponential backoff
}
}
}
// This should never be reached due to the retry loop, but TypeScript requires it
throw new Error('Unexpected end of callContextualizedEmbed method');
}
/**
* Prepare vector documents for MongoDB insertion
*/
prepareVectorDocuments(embeddings, batch, metadataMap) {
const documents = [];
let embeddingIndex = 0;
for (const doc of batch.documents) {
for (const chunk of doc.chunks) {
const vectorDoc = {
content: chunk.content,
contentHash: this.chunker.hashContent(chunk.content),
embedding: embeddings[embeddingIndex],
embeddingModel: 'voyage-context-3',
embeddedAt: new Date(),
metadata: metadataMap[embeddingIndex],
searchMeta: {
clickCount: 0,
boostFactor: 1.0,
},
};
documents.push(vectorDoc);
embeddingIndex++;
}
}
return documents;
}
/**
* Insert vector documents into MongoDB Atlas
*/
async insertToMongoDB(documents) {
const collection = this.mongodb.getVectorsCollection();
try {
// Use unordered insert to continue on duplicate errors
const result = await collection.insertMany(documents, {
ordered: false,
});
console.error(` š Inserted ${result.insertedCount} documents to MongoDB`);
}
catch (error) {
if (error.code === 11000) {
// Duplicate key error - some documents already exist
console.error(' ā ļø Some documents already exist, updating...');
await this.updateExistingDocuments(documents);
}
else {
throw error;
}
}
}
/**
* Update existing documents with new embeddings
*/
async updateExistingDocuments(documents) {
const collection = this.mongodb.getVectorsCollection();
let updated = 0;
for (const doc of documents) {
try {
await collection.replaceOne({ contentHash: doc.contentHash }, doc, { upsert: true });
updated++;
}
catch (error) {
console.error('Failed to update document:', error);
}
}
console.error(` š Updated ${updated} existing documents`);
}
/**
* Create batches of documents respecting Voyage API limits
*/
createBatches(chunkedDocs) {
const batches = [];
let currentBatch = [];
let currentTokens = 0;
let currentChunks = 0;
for (const doc of chunkedDocs) {
// Estimate tokens (rough approximation)
const docTokens = doc.chunks.reduce((sum, chunk) => sum + chunk.metadata.tokenCount, 0);
const docChunks = doc.chunks.length;
// Check if adding this document would exceed limits
if (currentBatch.length >= this.dynamicBatchSize ||
currentTokens + docTokens > this.MAX_TOTAL_TOKENS ||
currentChunks + docChunks > this.MAX_TOTAL_CHUNKS) {
// Save current batch
if (currentBatch.length > 0) {
batches.push({
documents: currentBatch,
totalTokens: currentTokens,
totalChunks: currentChunks,
});
}
// Start new batch
currentBatch = [];
currentTokens = 0;
currentChunks = 0;
}
// Add document to current batch
currentBatch.push(doc);
currentTokens += docTokens;
currentChunks += docChunks;
}
// Save final batch
if (currentBatch.length > 0) {
batches.push({
documents: currentBatch,
totalTokens: currentTokens,
totalChunks: currentChunks,
});
}
return batches;
}
/**
* Embed a single query for search using contextualized endpoint
*/
async embedQuery(query) {
try {
const response = await axios.post(this.voyageContextualUrl, {
inputs: [[query]], // Single query wrapped in double array
input_type: 'query', // Important: query type for asymmetric search
model: 'voyage-context-3',
output_dimension: this.VOYAGE_DIMENSIONS
}, {
headers: {
'Authorization': `Bearer ${this.voyageApiKey}`,
'Content-Type': 'application/json',
},
timeout: 30000,
});
if (!response.data?.data?.[0]?.data?.[0]?.embedding) {
throw new Error('No embedding returned for query');
}
// Extract embedding from nested structure
const embedding = response.data.data[0].data[0].embedding;
// Normalize for cosine similarity
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
return embedding.map((v) => v / magnitude);
}
catch (error) {
console.error('Failed to embed query:', error.response?.data || error);
throw error;
}
}
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
//# sourceMappingURL=embedding-pipeline.js.map