@knath2000/codebase-indexing-mcp
Version:
MCP server for codebase indexing with Voyage AI embeddings and Qdrant vector storage
444 lines (443 loc) • 12.8 kB
TypeScript
import { z } from 'zod';
export declare const ConfigSchema: z.ZodObject<{
voyageApiKey: z.ZodString;
qdrantUrl: z.ZodDefault<z.ZodString>;
qdrantApiKey: z.ZodOptional<z.ZodString>;
collectionName: z.ZodDefault<z.ZodString>;
embeddingModel: z.ZodDefault<z.ZodEnum<["voyage-code-3", "voyage-3.5", "voyage-3-large", "voyage-code-2", "voyage-2", "voyage-large-2"]>>;
batchSize: z.ZodDefault<z.ZodNumber>;
chunkSize: z.ZodDefault<z.ZodNumber>;
chunkOverlap: z.ZodDefault<z.ZodNumber>;
maxFileSize: z.ZodDefault<z.ZodNumber>;
excludePatterns: z.ZodDefault<z.ZodArray<z.ZodString, "many">>;
supportedExtensions: z.ZodDefault<z.ZodArray<z.ZodString, "many">>;
enableHybridSearch: z.ZodDefault<z.ZodBoolean>;
enableLLMReranking: z.ZodDefault<z.ZodBoolean>;
llmRerankerModel: z.ZodDefault<z.ZodString>;
llmRerankerApiKey: z.ZodOptional<z.ZodString>;
llmRerankerTimeoutMs: z.ZodDefault<z.ZodNumber>;
llmRerankerBaseUrl: z.ZodOptional<z.ZodString>;
llmRerankerProjectId: z.ZodOptional<z.ZodString>;
keywordSearchTimeoutMs: z.ZodDefault<z.ZodNumber>;
keywordSearchMaxChunks: z.ZodDefault<z.ZodNumber>;
searchCacheTTL: z.ZodDefault<z.ZodNumber>;
contextWindowSize: z.ZodDefault<z.ZodNumber>;
maxContextChunks: z.ZodDefault<z.ZodNumber>;
hybridSearchAlpha: z.ZodDefault<z.ZodNumber>;
fileWatchDebounceMs: z.ZodDefault<z.ZodNumber>;
mcpSchemaVersion: z.ZodDefault<z.ZodString>;
}, "strip", z.ZodTypeAny, {
voyageApiKey: string;
qdrantUrl: string;
collectionName: string;
embeddingModel: "voyage-code-3" | "voyage-3.5" | "voyage-3-large" | "voyage-code-2" | "voyage-2" | "voyage-large-2";
batchSize: number;
chunkSize: number;
chunkOverlap: number;
maxFileSize: number;
excludePatterns: string[];
supportedExtensions: string[];
enableHybridSearch: boolean;
enableLLMReranking: boolean;
llmRerankerModel: string;
llmRerankerTimeoutMs: number;
keywordSearchTimeoutMs: number;
keywordSearchMaxChunks: number;
searchCacheTTL: number;
contextWindowSize: number;
maxContextChunks: number;
hybridSearchAlpha: number;
fileWatchDebounceMs: number;
mcpSchemaVersion: string;
qdrantApiKey?: string | undefined;
llmRerankerApiKey?: string | undefined;
llmRerankerBaseUrl?: string | undefined;
llmRerankerProjectId?: string | undefined;
}, {
voyageApiKey: string;
qdrantUrl?: string | undefined;
qdrantApiKey?: string | undefined;
collectionName?: string | undefined;
embeddingModel?: "voyage-code-3" | "voyage-3.5" | "voyage-3-large" | "voyage-code-2" | "voyage-2" | "voyage-large-2" | undefined;
batchSize?: number | undefined;
chunkSize?: number | undefined;
chunkOverlap?: number | undefined;
maxFileSize?: number | undefined;
excludePatterns?: string[] | undefined;
supportedExtensions?: string[] | undefined;
enableHybridSearch?: boolean | undefined;
enableLLMReranking?: boolean | undefined;
llmRerankerModel?: string | undefined;
llmRerankerApiKey?: string | undefined;
llmRerankerTimeoutMs?: number | undefined;
llmRerankerBaseUrl?: string | undefined;
llmRerankerProjectId?: string | undefined;
keywordSearchTimeoutMs?: number | undefined;
keywordSearchMaxChunks?: number | undefined;
searchCacheTTL?: number | undefined;
contextWindowSize?: number | undefined;
maxContextChunks?: number | undefined;
hybridSearchAlpha?: number | undefined;
fileWatchDebounceMs?: number | undefined;
mcpSchemaVersion?: string | undefined;
}>;
export type Config = z.infer<typeof ConfigSchema>;
export interface CodeChunk {
id: string;
content: string;
filePath: string;
language: string;
startLine: number;
endLine: number;
chunkType: ChunkType;
functionName?: string | undefined;
className?: string | undefined;
moduleName?: string | undefined;
metadata: ChunkMetadata;
contentHash: string;
astNodeType?: string;
parentChunkId?: string;
childChunkIds?: string[];
complexity?: number;
tokenCount?: number;
}
export declare enum ChunkType {
FUNCTION = "function",
CLASS = "class",
MODULE = "module",
INTERFACE = "interface",
TYPE = "type",
VARIABLE = "variable",
IMPORT = "import",
COMMENT = "comment",
GENERIC = "generic",
METHOD = "method",
PROPERTY = "property",
CONSTRUCTOR = "constructor",
ENUM = "enum",
NAMESPACE = "namespace",
DECORATOR = "decorator",
SECTION = "section",
CODE_BLOCK = "code_block",
PARAGRAPH = "paragraph",
LIST = "list",
TABLE = "table",
BLOCKQUOTE = "blockquote"
}
export interface ChunkMetadata {
fileSize: number;
lastModified: number;
language: string;
extension: string;
relativePath: string;
isTest: boolean;
complexity?: number;
dependencies?: string[];
exports?: string[];
imports?: string[];
isRecentlyModified?: boolean;
isCurrentlyOpen?: boolean;
editDistance?: number;
semanticParent?: string;
}
export interface MultiVectorEmbedding {
id: string;
denseVector: number[];
sparseVector?: SparseVector;
payload: EmbeddingPayload;
}
export interface SparseVector {
indices: number[];
values: number[];
}
export interface EmbeddingVector {
id: string;
vector: number[];
payload: EmbeddingPayload;
}
export interface EmbeddingPayload {
content: string;
filePath: string;
language: string;
chunkType: ChunkType;
startLine: number;
endLine: number;
functionName?: string | undefined;
className?: string | undefined;
moduleName?: string | undefined;
metadata: ChunkMetadata;
contentHash: string;
tokenCount: number;
astNodeType?: string;
parentChunkId?: string;
childChunkIds?: string[];
complexity?: number;
fileKind: 'code' | 'docs';
[key: string]: unknown;
}
export interface SearchQuery {
query: string;
language?: string;
chunkType?: ChunkType;
filePath?: string;
limit?: number;
threshold?: number;
enableHybrid?: boolean;
enableReranking?: boolean;
llmRerankerTimeoutMs?: number;
/** Maximum number of results to keep per file type (function/class/etc.). */
maxFilesPerType?: number;
/** Boost functions when true (overrides automatic heuristics). */
preferFunctions?: boolean;
/** Boost classes when true (overrides automatic heuristics). */
preferClasses?: boolean;
/** Prefer implementation code over documentation (default true). */
preferImplementation?: boolean;
}
export interface CodeReference {
type: 'code_reference';
path: string;
lines: [number, number];
snippet: string;
score?: number;
chunkType?: ChunkType;
language?: string;
metadata?: {
functionName?: string;
className?: string;
complexity?: number;
isTest?: boolean;
};
}
export interface SearchResult {
id: string;
score: number;
chunk: CodeChunk;
snippet: string;
context?: string | undefined;
rerankedScore?: number;
hybridScore?: {
dense: number;
sparse?: number;
combined: number;
};
codeReference?: CodeReference;
}
export interface HybridSearchResult {
denseResults: SearchResult[];
sparseResults?: SearchResult[];
combinedResults: SearchResult[];
alpha: number;
}
export interface LLMRerankerRequest {
query: string;
candidates: SearchResult[];
maxResults: number;
}
export interface LLMRerankerResponse {
rerankedResults: SearchResult[];
reasoning?: string;
confidence?: number;
}
export interface IndexingProgress {
totalFiles: number;
processedFiles: number;
totalChunks: number;
processedChunks: number;
currentFile: string;
status: IndexingStatus;
startTime: Date;
estimatedTimeRemaining?: number;
errors: IndexingError[];
incrementalUpdates: number;
skippedFiles: number;
cacheHits: number;
}
export declare enum IndexingStatus {
IDLE = "idle",
SCANNING = "scanning",
PARSING = "parsing",
EMBEDDING = "embedding",
STORING = "storing",
COMPLETED = "completed",
ERROR = "error",
WATCHING = "watching",
INCREMENTAL_UPDATE = "incremental_update"
}
export interface IndexingError {
filePath: string;
error: string;
timestamp: Date;
severity: 'warning' | 'error' | 'critical';
}
export interface ParsedNode {
type: string;
startPosition: Position;
endPosition: Position;
text: string;
children?: ParsedNode[];
name?: string;
kind?: string;
nodeId?: string;
parentId?: string;
depth: number;
isExported?: boolean;
isAsync?: boolean;
visibility?: 'public' | 'private' | 'protected';
}
export interface Position {
row: number;
column: number;
}
export interface LanguageConfig {
name: string;
extensions: string[];
grammar: string;
chunkStrategies: ChunkStrategy[];
keywords: string[];
commentPatterns: string[];
astNodeMappings: Record<string, ChunkType>;
contextualChunking: boolean;
supportsSparseSearch: boolean;
}
export interface ChunkStrategy {
nodeType: string;
chunkType: ChunkType;
nameExtractor?: (node: ParsedNode) => string;
includeContext?: boolean;
minSize?: number;
maxSize?: number;
preserveHierarchy?: boolean;
includeSignature?: boolean;
includeDocstring?: boolean;
priority?: number;
}
export interface IndexStats {
totalFiles: number;
totalChunks: number;
totalSize: number;
languageDistribution: Record<string, number>;
chunkTypeDistribution: Partial<Record<ChunkType, number>>;
lastIndexed: Date;
indexingDuration: number;
averageChunkSize: number;
largestFile: string;
errors: number;
warnings: number;
incrementalUpdates: number;
cacheHitRate: number;
averageComplexity: number;
tokensIndexed: number;
memoryUsage: number;
searchQueriesServed: number;
averageSearchLatency: number;
}
export interface SearchStats {
totalQueries: number;
averageLatency: number;
cacheHitRate: number;
hybridSearchUsage: number;
llmRerankerUsage: number;
topLanguages: Record<string, number>;
topChunkTypes: Record<string, number>;
errorRate: number;
lastQuery: Date;
totalChunks: number;
embeddingModel: string;
embeddingDimension: number;
collectionStatus: string;
searchCacheSize: number;
searchCacheMemory: number;
rerankerCacheSize: number;
rerankerCacheMemory: number;
llmRerankerAverageLatency: number;
llmRerankerErrorRate: number;
qdrantClientLatency: number;
}
export interface HealthStatus {
status: 'healthy' | 'degraded' | 'unhealthy';
timestamp: Date;
services: {
qdrant: ServiceHealth;
voyage: ServiceHealth;
llmReranker?: ServiceHealth;
fileWatcher: ServiceHealth;
};
metrics: {
uptime: number;
memoryUsage: number;
cpuUsage?: number;
diskUsage?: number;
};
version: string;
mcpSchemaVersion: string;
}
export interface ServiceHealth {
status: 'healthy' | 'degraded' | 'unhealthy' | 'disabled';
latency?: number;
errorRate?: number;
lastCheck: Date;
message?: string;
}
export interface SearchCache {
query: string;
queryHash: string;
results: SearchResult[];
timestamp: Date;
ttl: number;
metadata: {
language?: string;
chunkType?: ChunkType;
filePath?: string;
};
}
export interface FileChangeEvent {
type: 'created' | 'modified' | 'deleted' | 'renamed';
path: string;
timestamp: Date;
size?: number;
hash?: string;
}
export interface FileWatchBatch {
events: FileChangeEvent[];
batchId: string;
timestamp: Date;
processed: boolean;
}
export interface McpTool {
name: string;
description: string;
inputSchema: any;
}
export interface VoyageEmbeddingResponse {
object: string;
data: Array<{
object: string;
embedding: number[];
index: number;
}>;
model: string;
usage: {
total_tokens: number;
};
}
export interface VoyageEmbeddingRequest {
input: string | string[];
model: string;
input_type?: 'query' | 'document';
truncation?: boolean;
output_dimension?: number;
}
export interface ContextWindow {
maxTokens: number;
usedTokens: number;
chunks: CodeReference[];
truncated: boolean;
summary?: string;
}
export interface TokenBudget {
total: number;
reserved: number;
available: number;
used: number;
}