embedocs-mcp
Version:
Transform any GitHub repository into searchable vector embeddings. MCP server with smart indexing, voyage-context-3 embeddings, and semantic search for Claude/Cursor IDEs.
263 lines • 11.7 kB
JavaScript
/**
* SINGLE Embedding Service - The ONLY place that generates embeddings
* No more duplicate implementations!
*/
import axios from 'axios';
import { config } from '../config/index.js';
export class EmbeddingService {
static instance;
apiKey;
constructor() {
this.apiKey = process.env.VOYAGE_API_KEY;
if (!this.apiKey) {
throw new Error('VOYAGE_API_KEY is required');
}
}
static getInstance() {
if (!this.instance) {
this.instance = new EmbeddingService();
}
return this.instance;
}
/**
* Generate embeddings for documents - FOLLOWS voyage-ai/voyageai-python implementation
* @see https://github.com/voyage-ai/voyageai-python/blob/main/tests/test_client.py
*/
async embedDocuments(texts) {
if (texts.length === 0)
return [];
// voyage-context-3 requires smaller batches due to 32,000 token limit
// Documentation: voyage-ai/langchain-voyageai/libs/voyageai/langchain_voyageai/embeddings.py
if (config.embedding.model.includes('context')) {
// For context models, use VERY small batches to stay under token limit
const maxContextBatchSize = 1; // Process one chunk at a time for safety
const batches = this.createBatches(texts, maxContextBatchSize);
const allResults = [];
for (const batch of batches) {
const results = await this.processContextBatch(batch, 'document');
allResults.push(...results);
// Rate limiting from config
await this.delay(config.embedding.rateLimit);
}
return allResults;
}
else {
// For non-context models, batch normally
const batches = this.createBatches(texts, config.embedding.maxBatchSize);
const allResults = [];
for (const batch of batches) {
const results = await this.processBatch(batch, 'document');
allResults.push(...results);
// Rate limiting from config
await this.delay(config.embedding.rateLimit);
}
return allResults;
}
}
/**
* Generate embeddings for queries - FOLLOWS voyage-ai/voyageai-python implementation
* @see https://github.com/voyage-ai/langchain-voyageai/blob/main/libs/voyageai/langchain_voyageai/embeddings.py#L142
*/
async embedQuery(text) {
if (config.embedding.model.includes('context')) {
// Context models need double-wrapped array
const results = await this.processContextBatch([text], 'query');
return results[0];
}
else {
const results = await this.processBatch([text], 'query');
return results[0];
}
}
/**
* Process contextualized embeddings - Based on voyage-ai/langchain-voyageai
* @see https://github.com/voyage-ai/langchain-voyageai/blob/main/libs/voyageai/langchain_voyageai/embeddings.py#L111
*/
async processContextBatch(texts, inputType) {
let retries = 0;
// FINAL SAFETY NET: Validate all texts before API call
const validTexts = texts.filter(text => {
const tokenCount = this.estimateTokens(text);
if (tokenCount > 30000) {
console.error(`❌ CRITICAL: Text still exceeds token limit (${tokenCount} tokens), emergency truncating`);
return false;
}
return true;
});
if (validTexts.length !== texts.length) {
console.warn(`⚠️ Filtered ${texts.length - validTexts.length} oversized texts to prevent API errors`);
}
// CRITICAL: Handle case where all texts are filtered out
if (validTexts.length === 0) {
console.warn(`⚠️ All texts filtered out due to size limits, returning empty embeddings`);
return texts.map(() => ({
embedding: new Array(config.embedding.dimensions).fill(0),
normalized: new Array(config.embedding.dimensions).fill(0),
dimensions: config.embedding.dimensions,
model: 'filtered-oversized'
}));
}
while (retries < config.embedding.retries) {
try {
// CRITICAL: voyage-context-3 requires double-wrapped array
// Documentation confirms: inputs should be [[texts]] for contextualized
const response = await axios.post(config.embedding.apiUrl, {
inputs: [validTexts], // Use validated texts
input_type: inputType,
model: config.embedding.model,
output_dimension: config.embedding.dimensions
}, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
timeout: config.embedding.timeout,
});
// Parse contextualized response - Based on actual API structure
// From test files: response.data.data[0].data[0].embedding
if (response.data?.data?.[0]?.data) {
const embeddings = response.data.data[0].data.map((item) => item.embedding);
return embeddings.map((embedding) => ({
embedding,
normalized: this.normalize(embedding),
dimensions: embedding.length,
model: config.embedding.model
}));
}
else if (response.data?.embeddings) {
// Fallback structure
const embeddings = response.data.embeddings;
return embeddings.map((embedding) => ({
embedding,
normalized: this.normalize(embedding),
dimensions: embedding.length,
model: config.embedding.model
}));
}
console.error('Unexpected response structure:', JSON.stringify(response.data, null, 2));
throw new Error('Invalid response structure from contextualized API');
}
catch (error) {
retries++;
// NETWORK ERROR DETECTION - handles ENOTFOUND api.voyageai.com
const isNetworkError = error.code === 'ENOTFOUND' ||
error.code === 'ECONNRESET' ||
error.code === 'ETIMEDOUT' ||
error.message?.includes('ENOTFOUND') ||
error.message?.includes('ECONNRESET');
// Log detailed error for debugging
if (error.response?.data) {
console.error('API Error Response:', JSON.stringify(error.response.data, null, 2));
}
else if (isNetworkError) {
console.warn(`🌐 Network error: ${error.code || error.message}`);
}
if (retries >= config.embedding.retries) {
throw new Error(`Context embedding failed after ${config.embedding.retries} retries: ${error.message}`);
}
// Exponential backoff for network errors, linear for API errors
if (isNetworkError) {
const delay = Math.min(1000 * Math.pow(2, retries), 30000); // Max 30s
console.warn(`🌐 Network retry ${retries}/${config.embedding.retries} in ${delay}ms...`);
await this.delay(delay);
}
else {
await this.delay(1000 * retries);
}
}
}
return [];
}
/**
* Process regular (non-contextualized) embeddings
*/
async processBatch(texts, inputType) {
let retries = 0;
while (retries < config.embedding.retries) {
try {
// Regular embedding endpoint (not used for voyage-context-3)
const response = await axios.post('https://api.voyageai.com/v1/embeddings', // Regular endpoint
{
input: texts,
model: config.embedding.model,
input_type: inputType,
output_dimension: config.embedding.dimensions
}, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
timeout: config.embedding.timeout,
});
// Parse regular response
if (response.data?.data) {
const embeddings = response.data.data.map((item) => item.embedding);
return embeddings.map((embedding) => ({
embedding,
normalized: this.normalize(embedding),
dimensions: embedding.length,
model: config.embedding.model
}));
}
throw new Error('Invalid response structure from regular API');
}
catch (error) {
retries++;
// NETWORK ERROR DETECTION - handles ENOTFOUND api.voyageai.com
const isNetworkError = error.code === 'ENOTFOUND' ||
error.code === 'ECONNRESET' ||
error.code === 'ETIMEDOUT' ||
error.message?.includes('ENOTFOUND') ||
error.message?.includes('ECONNRESET');
if (isNetworkError) {
console.warn(`🌐 Network error: ${error.code || error.message}`);
}
if (retries >= config.embedding.retries) {
throw new Error(`Embedding failed after ${config.embedding.retries} retries: ${error.message}`);
}
// Exponential backoff for network errors, linear for API errors
if (isNetworkError) {
const delay = Math.min(1000 * Math.pow(2, retries), 30000); // Max 30s
console.warn(`🌐 Network retry ${retries}/${config.embedding.retries} in ${delay}ms...`);
await this.delay(delay);
}
else {
await this.delay(1000 * retries);
}
}
}
return [];
}
normalize(embedding) {
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
return magnitude > 0 ? embedding.map(v => v / magnitude) : embedding;
}
createBatches(items, batchSize) {
const batches = [];
for (let i = 0; i < items.length; i += batchSize) {
batches.push(items.slice(i, i + batchSize));
}
return batches;
}
delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Quick token estimation for safety checks
*/
estimateTokens(text) {
// Conservative estimate: 1 token per 3.5 characters
return Math.ceil(text.length / 3.5);
}
/**
* Get current configuration (for debugging)
*/
getConfig() {
return {
model: config.embedding.model,
dimensions: config.embedding.dimensions,
maxBatchSize: config.embedding.maxBatchSize
};
}
}
//# sourceMappingURL=embeddings.js.map