@knath2000/codebase-indexing-mcp
Version:
MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage
192 lines • 7.13 kB
JavaScript
import { ConfigSchema } from './types.js';
import dotenv from 'dotenv';
// Load environment variables from .env file
dotenv.config();
/**
* Load configuration from environment variables
*/
export function loadConfig() {
const config = {
voyageApiKey: process.env.VOYAGE_API_KEY || '',
qdrantUrl: process.env.QDRANT_URL || 'http://localhost:6333',
qdrantApiKey: process.env.QDRANT_API_KEY,
collectionName: process.env.COLLECTION_NAME || 'codebase',
embeddingModel: process.env.EMBEDDING_MODEL || 'voyage-code-3',
batchSize: parseInt(process.env.BATCH_SIZE || '100'),
chunkSize: parseInt(process.env.CHUNK_SIZE || '800'), // Privacy-optimized: 800 chars max
chunkOverlap: parseInt(process.env.CHUNK_OVERLAP || '100'), // Reduced overlap for privacy
maxFileSize: parseInt(process.env.MAX_FILE_SIZE || '1048576'), // 1MB
excludePatterns: process.env.EXCLUDE_PATTERNS?.split(',') || [
'node_modules/**',
'dist/**',
'build/**',
'*.git*',
'*.log',
'*.tmp',
'*.temp',
'*.cache',
'*.lock',
'*.min.js',
'*.min.css',
'*.map',
// Binary and executable files
'*.exe',
'*.bin',
'*.dll',
'*.so',
'*.dylib',
'*.app',
'*.deb',
'*.rpm',
'*.msi',
// Archive files
'*.zip',
'*.tar',
'*.gz',
'*.bz2',
'*.xz',
'*.7z',
'*.rar',
'*.jar',
'*.war',
'*.ear',
// Image files
'*.jpg',
'*.jpeg',
'*.png',
'*.gif',
'*.bmp',
'*.tiff',
'*.tif',
'*.webp',
'*.svg',
'*.ico',
'*.icns',
// Video files
'*.mp4',
'*.avi',
'*.mov',
'*.wmv',
'*.flv',
'*.webm',
'*.mkv',
'*.m4v',
// Audio files
'*.mp3',
'*.wav',
'*.flac',
'*.aac',
'*.ogg',
'*.wma',
'*.m4a',
// Font files
'*.ttf',
'*.otf',
'*.woff',
'*.woff2',
'*.eot',
// Database files
'*.db',
'*.sqlite',
'*.sqlite3',
'*.mdb',
// Document files (binary formats)
'*.pdf',
'*.doc',
'*.docx',
'*.xls',
'*.xlsx',
'*.ppt',
'*.pptx',
// Other binary formats
'*.dmg',
'*.iso',
'*.img',
'*.vdi',
'*.vmdk',
'*.qcow2'
],
supportedExtensions: process.env.SUPPORTED_EXTENSIONS?.split(',') || [
'.ts', '.js', '.tsx', '.jsx',
'.py', '.java', '.cpp', '.c', '.h',
'.go', '.rs', '.cs', '.php',
'.rb', '.swift', '.kt', '.scala',
'.md', '.txt', '.json', '.yaml', '.yml',
'.html', '.css', '.scss', '.less'
],
llmRerankerTimeoutMs: parseInt(process.env.LLM_RERANKER_TIMEOUT_MS || '25000'),
llmRerankerBaseUrl: process.env.LLM_RERANKER_BASE_URL && process.env.LLM_RERANKER_PROJECT_ID
? `${process.env.LLM_RERANKER_BASE_URL}/${process.env.LLM_RERANKER_PROJECT_ID}/v1`
: process.env.LLM_RERANKER_BASE_URL,
llmRerankerProjectId: process.env.LLM_RERANKER_PROJECT_ID, // LangDB Project ID
keywordSearchTimeoutMs: parseInt(process.env.KEYWORD_SEARCH_TIMEOUT_MS || '10000'),
keywordSearchMaxChunks: parseInt(process.env.KEYWORD_SEARCH_MAX_CHUNKS || '20000'),
hybridSearchAlpha: parseFloat(process.env.HYBRID_SEARCH_ALPHA || '0.7'),
enableLLMReranking: process.env.ENABLE_LLM_RERANKING ? process.env.ENABLE_LLM_RERANKING !== 'false' : true,
llmRerankerApiKey: process.env.LLM_RERANKER_API_KEY,
llmRerankerModel: process.env.LLM_RERANKER_MODEL || 'anthropic/claude-3-haiku-20240307',
};
// Validate configuration
const validatedConfig = ConfigSchema.parse(config);
if (!validatedConfig.voyageApiKey) {
throw new Error('VOYAGE_API_KEY environment variable is required');
}
return validatedConfig;
}
/**
* Validate configuration
*/
export function validateConfig(config) {
if (!config.voyageApiKey) {
throw new Error('Voyage API key is required');
}
if (!config.qdrantUrl) {
throw new Error('Qdrant URL is required');
}
if (config.batchSize <= 0) {
throw new Error('Batch size must be greater than 0');
}
if (config.chunkSize <= 0) {
throw new Error('Chunk size must be greater than 0');
}
// Privacy-focused chunk size validation
if (config.chunkSize < 100) {
throw new Error('Chunk size must be at least 100 characters for meaningful code context');
}
if (config.chunkSize > 1000) {
throw new Error('Chunk size must not exceed 1000 characters for privacy protection');
}
if (config.chunkOverlap >= config.chunkSize) {
throw new Error('Chunk overlap must be less than chunk size');
}
if (config.maxFileSize <= 0) {
throw new Error('Max file size must be greater than 0');
}
if (config.llmRerankerTimeoutMs < 10000) {
throw new Error('LLM reranker timeout must be at least 10000 ms');
}
if (!config.supportedExtensions.length) {
throw new Error('At least one supported extension must be specified');
}
if (!['voyage-code-3', 'voyage-3.5', 'voyage-3-large', 'voyage-code-2', 'voyage-2', 'voyage-large-2'].includes(config.embeddingModel)) {
console.warn('Warning: Embedding model ' + config.embeddingModel + ' may not be supported');
}
}
// Add back the printConfigSummary export
export function printConfigSummary(config) {
console.log('Configuration Summary:');
console.log(` Qdrant URL: ${config.qdrantUrl}`);
console.log(` Collection: ${config.collectionName}`);
console.log(` Embedding Model: ${config.embeddingModel}`);
console.log(` Batch Size: ${config.batchSize}`);
console.log(` Chunk Size: ${config.chunkSize}`);
console.log(` Max File Size: ${config.maxFileSize} bytes`);
console.log(` Supported Extensions: ${config.supportedExtensions.join(', ')}`);
console.log(` LLM Reranker Timeout: ${config.llmRerankerTimeoutMs} ms`);
console.log(` LLM Reranker Base URL: ${config.llmRerankerBaseUrl}`);
console.log(` Exclude Patterns: ${config.excludePatterns.join(', ')}`);
console.log(` Keyword Search Timeout: ${config.keywordSearchTimeoutMs} ms`);
console.log(` Keyword Search Max Chunks: ${config.keywordSearchMaxChunks}`);
console.log(` Hybrid Search α: ${config.hybridSearchAlpha}`);
}
//# sourceMappingURL=config.js.map