UNPKG

@knath2000/codebase-indexing-mcp

Version:

MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage

444 lines (443 loc) 12.8 kB
import { z } from 'zod'; export declare const ConfigSchema: z.ZodObject<{ voyageApiKey: z.ZodString; qdrantUrl: z.ZodDefault<z.ZodString>; qdrantApiKey: z.ZodOptional<z.ZodString>; collectionName: z.ZodDefault<z.ZodString>; embeddingModel: z.ZodDefault<z.ZodEnum<["voyage-code-3", "voyage-3.5", "voyage-3-large", "voyage-code-2", "voyage-2", "voyage-large-2"]>>; batchSize: z.ZodDefault<z.ZodNumber>; chunkSize: z.ZodDefault<z.ZodNumber>; chunkOverlap: z.ZodDefault<z.ZodNumber>; maxFileSize: z.ZodDefault<z.ZodNumber>; excludePatterns: z.ZodDefault<z.ZodArray<z.ZodString, "many">>; supportedExtensions: z.ZodDefault<z.ZodArray<z.ZodString, "many">>; enableHybridSearch: z.ZodDefault<z.ZodBoolean>; enableLLMReranking: z.ZodDefault<z.ZodBoolean>; llmRerankerModel: z.ZodDefault<z.ZodString>; llmRerankerApiKey: z.ZodOptional<z.ZodString>; llmRerankerTimeoutMs: z.ZodDefault<z.ZodNumber>; llmRerankerBaseUrl: z.ZodOptional<z.ZodString>; llmRerankerProjectId: z.ZodOptional<z.ZodString>; keywordSearchTimeoutMs: z.ZodDefault<z.ZodNumber>; keywordSearchMaxChunks: z.ZodDefault<z.ZodNumber>; searchCacheTTL: z.ZodDefault<z.ZodNumber>; contextWindowSize: z.ZodDefault<z.ZodNumber>; maxContextChunks: z.ZodDefault<z.ZodNumber>; hybridSearchAlpha: z.ZodDefault<z.ZodNumber>; fileWatchDebounceMs: z.ZodDefault<z.ZodNumber>; mcpSchemaVersion: z.ZodDefault<z.ZodString>; }, "strip", z.ZodTypeAny, { voyageApiKey: string; qdrantUrl: string; collectionName: string; embeddingModel: "voyage-code-3" | "voyage-3.5" | "voyage-3-large" | "voyage-code-2" | "voyage-2" | "voyage-large-2"; batchSize: number; chunkSize: number; chunkOverlap: number; maxFileSize: number; excludePatterns: string[]; supportedExtensions: string[]; enableHybridSearch: boolean; enableLLMReranking: boolean; llmRerankerModel: string; llmRerankerTimeoutMs: number; keywordSearchTimeoutMs: number; keywordSearchMaxChunks: number; searchCacheTTL: number; contextWindowSize: number; maxContextChunks: number; hybridSearchAlpha: number; fileWatchDebounceMs: number; mcpSchemaVersion: string; qdrantApiKey?: string | undefined; llmRerankerApiKey?: string | undefined; llmRerankerBaseUrl?: string | undefined; llmRerankerProjectId?: string | undefined; }, { voyageApiKey: string; qdrantUrl?: string | undefined; qdrantApiKey?: string | undefined; collectionName?: string | undefined; embeddingModel?: "voyage-code-3" | "voyage-3.5" | "voyage-3-large" | "voyage-code-2" | "voyage-2" | "voyage-large-2" | undefined; batchSize?: number | undefined; chunkSize?: number | undefined; chunkOverlap?: number | undefined; maxFileSize?: number | undefined; excludePatterns?: string[] | undefined; supportedExtensions?: string[] | undefined; enableHybridSearch?: boolean | undefined; enableLLMReranking?: boolean | undefined; llmRerankerModel?: string | undefined; llmRerankerApiKey?: string | undefined; llmRerankerTimeoutMs?: number | undefined; llmRerankerBaseUrl?: string | undefined; llmRerankerProjectId?: string | undefined; keywordSearchTimeoutMs?: number | undefined; keywordSearchMaxChunks?: number | undefined; searchCacheTTL?: number | undefined; contextWindowSize?: number | undefined; maxContextChunks?: number | undefined; hybridSearchAlpha?: number | undefined; fileWatchDebounceMs?: number | undefined; mcpSchemaVersion?: string | undefined; }>; export type Config = z.infer<typeof ConfigSchema>; export interface CodeChunk { id: string; content: string; filePath: string; language: string; startLine: number; endLine: number; chunkType: ChunkType; functionName?: string | undefined; className?: string | undefined; moduleName?: string | undefined; metadata: ChunkMetadata; contentHash: string; astNodeType?: string; parentChunkId?: string; childChunkIds?: string[]; complexity?: number; tokenCount?: number; } export declare enum ChunkType { FUNCTION = "function", CLASS = "class", MODULE = "module", INTERFACE = "interface", TYPE = "type", VARIABLE = "variable", IMPORT = "import", COMMENT = "comment", GENERIC = "generic", METHOD = "method", PROPERTY = "property", CONSTRUCTOR = "constructor", ENUM = "enum", NAMESPACE = "namespace", DECORATOR = "decorator", SECTION = "section", CODE_BLOCK = "code_block", PARAGRAPH = "paragraph", LIST = "list", TABLE = "table", BLOCKQUOTE = "blockquote" } export interface ChunkMetadata { fileSize: number; lastModified: number; language: string; extension: string; relativePath: string; isTest: boolean; complexity?: number; dependencies?: string[]; exports?: string[]; imports?: string[]; isRecentlyModified?: boolean; isCurrentlyOpen?: boolean; editDistance?: number; semanticParent?: string; } export interface MultiVectorEmbedding { id: string; denseVector: number[]; sparseVector?: SparseVector; payload: EmbeddingPayload; } export interface SparseVector { indices: number[]; values: number[]; } export interface EmbeddingVector { id: string; vector: number[]; payload: EmbeddingPayload; } export interface EmbeddingPayload { content: string; filePath: string; language: string; chunkType: ChunkType; startLine: number; endLine: number; functionName?: string | undefined; className?: string | undefined; moduleName?: string | undefined; metadata: ChunkMetadata; contentHash: string; tokenCount: number; astNodeType?: string; parentChunkId?: string; childChunkIds?: string[]; complexity?: number; fileKind: 'code' | 'docs'; [key: string]: unknown; } export interface SearchQuery { query: string; language?: string; chunkType?: ChunkType; filePath?: string; limit?: number; threshold?: number; enableHybrid?: boolean; enableReranking?: boolean; llmRerankerTimeoutMs?: number; /** Maximum number of results to keep per file type (function/class/etc.). */ maxFilesPerType?: number; /** Boost functions when true (overrides automatic heuristics). */ preferFunctions?: boolean; /** Boost classes when true (overrides automatic heuristics). */ preferClasses?: boolean; /** Prefer implementation code over documentation (default true). */ preferImplementation?: boolean; } export interface CodeReference { type: 'code_reference'; path: string; lines: [number, number]; snippet: string; score?: number; chunkType?: ChunkType; language?: string; metadata?: { functionName?: string; className?: string; complexity?: number; isTest?: boolean; }; } export interface SearchResult { id: string; score: number; chunk: CodeChunk; snippet: string; context?: string | undefined; rerankedScore?: number; hybridScore?: { dense: number; sparse?: number; combined: number; }; codeReference?: CodeReference; } export interface HybridSearchResult { denseResults: SearchResult[]; sparseResults?: SearchResult[]; combinedResults: SearchResult[]; alpha: number; } export interface LLMRerankerRequest { query: string; candidates: SearchResult[]; maxResults: number; } export interface LLMRerankerResponse { rerankedResults: SearchResult[]; reasoning?: string; confidence?: number; } export interface IndexingProgress { totalFiles: number; processedFiles: number; totalChunks: number; processedChunks: number; currentFile: string; status: IndexingStatus; startTime: Date; estimatedTimeRemaining?: number; errors: IndexingError[]; incrementalUpdates: number; skippedFiles: number; cacheHits: number; } export declare enum IndexingStatus { IDLE = "idle", SCANNING = "scanning", PARSING = "parsing", EMBEDDING = "embedding", STORING = "storing", COMPLETED = "completed", ERROR = "error", WATCHING = "watching", INCREMENTAL_UPDATE = "incremental_update" } export interface IndexingError { filePath: string; error: string; timestamp: Date; severity: 'warning' | 'error' | 'critical'; } export interface ParsedNode { type: string; startPosition: Position; endPosition: Position; text: string; children?: ParsedNode[]; name?: string; kind?: string; nodeId?: string; parentId?: string; depth: number; isExported?: boolean; isAsync?: boolean; visibility?: 'public' | 'private' | 'protected'; } export interface Position { row: number; column: number; } export interface LanguageConfig { name: string; extensions: string[]; grammar: string; chunkStrategies: ChunkStrategy[]; keywords: string[]; commentPatterns: string[]; astNodeMappings: Record<string, ChunkType>; contextualChunking: boolean; supportsSparseSearch: boolean; } export interface ChunkStrategy { nodeType: string; chunkType: ChunkType; nameExtractor?: (node: ParsedNode) => string; includeContext?: boolean; minSize?: number; maxSize?: number; preserveHierarchy?: boolean; includeSignature?: boolean; includeDocstring?: boolean; priority?: number; } export interface IndexStats { totalFiles: number; totalChunks: number; totalSize: number; languageDistribution: Record<string, number>; chunkTypeDistribution: Partial<Record<ChunkType, number>>; lastIndexed: Date; indexingDuration: number; averageChunkSize: number; largestFile: string; errors: number; warnings: number; incrementalUpdates: number; cacheHitRate: number; averageComplexity: number; tokensIndexed: number; memoryUsage: number; searchQueriesServed: number; averageSearchLatency: number; } export interface SearchStats { totalQueries: number; averageLatency: number; cacheHitRate: number; hybridSearchUsage: number; llmRerankerUsage: number; topLanguages: Record<string, number>; topChunkTypes: Record<string, number>; errorRate: number; lastQuery: Date; totalChunks: number; embeddingModel: string; embeddingDimension: number; collectionStatus: string; searchCacheSize: number; searchCacheMemory: number; rerankerCacheSize: number; rerankerCacheMemory: number; llmRerankerAverageLatency: number; llmRerankerErrorRate: number; qdrantClientLatency: number; } export interface HealthStatus { status: 'healthy' | 'degraded' | 'unhealthy'; timestamp: Date; services: { qdrant: ServiceHealth; voyage: ServiceHealth; llmReranker?: ServiceHealth; fileWatcher: ServiceHealth; }; metrics: { uptime: number; memoryUsage: number; cpuUsage?: number; diskUsage?: number; }; version: string; mcpSchemaVersion: string; } export interface ServiceHealth { status: 'healthy' | 'degraded' | 'unhealthy' | 'disabled'; latency?: number; errorRate?: number; lastCheck: Date; message?: string; } export interface SearchCache { query: string; queryHash: string; results: SearchResult[]; timestamp: Date; ttl: number; metadata: { language?: string; chunkType?: ChunkType; filePath?: string; }; } export interface FileChangeEvent { type: 'created' | 'modified' | 'deleted' | 'renamed'; path: string; timestamp: Date; size?: number; hash?: string; } export interface FileWatchBatch { events: FileChangeEvent[]; batchId: string; timestamp: Date; processed: boolean; } export interface McpTool { name: string; description: string; inputSchema: any; } export interface VoyageEmbeddingResponse { object: string; data: Array<{ object: string; embedding: number[]; index: number; }>; model: string; usage: { total_tokens: number; }; } export interface VoyageEmbeddingRequest { input: string | string[]; model: string; input_type?: 'query' | 'document'; truncation?: boolean; output_dimension?: number; } export interface ContextWindow { maxTokens: number; usedTokens: number; chunks: CodeReference[]; truncated: boolean; summary?: string; } export interface TokenBudget { total: number; reserved: number; available: number; used: number; }