codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
226 lines • 6.55 kB
TypeScript
/**
* Vector-Based RAG System for CodeCrucible Synth
* Production-ready implementation with local-first architecture, LanceDB storage,
* and real-time incremental indexing optimized for code repositories
*/
import { EventEmitter } from 'events';
import { UnifiedModelClient } from '../../refactor/unified-model-client.js';
export interface VectorDocument {
id: string;
content: string;
embedding?: number[];
metadata: DocumentMetadata;
chunks?: DocumentChunk[];
}
export interface DocumentMetadata {
filePath: string;
language: string;
fileType: string;
lastModified: Date;
size: number;
hash: string;
repository?: string;
branch?: string;
author?: string;
semanticType: 'code' | 'documentation' | 'configuration' | 'test';
extractedSymbols?: ExtractedSymbol[];
}
export interface ExtractedSymbol {
name: string;
type: 'function' | 'class' | 'interface' | 'variable' | 'constant';
startLine: number;
endLine: number;
signature?: string;
docstring?: string;
}
export interface DocumentChunk {
id: string;
content: string;
embedding: number[];
startOffset: number;
endOffset: number;
chunkType: 'function' | 'class' | 'block' | 'comment' | 'documentation';
parentDocument: string;
semanticWeight: number;
}
export interface RAGQuery {
query: string;
queryType: 'semantic' | 'exact' | 'hybrid';
filters?: QueryFilter[];
maxResults?: number;
threshold?: number;
contextWindow?: number;
includeMetadata?: boolean;
rerank?: boolean;
}
export interface QueryFilter {
field: string;
operator: 'equals' | 'contains' | 'startsWith' | 'in' | 'gt' | 'lt';
value: string | string[] | number;
}
export interface RAGResult {
documents: ScoredDocument[];
totalFound: number;
queryTime: number;
retrievalMethod: string;
reranked: boolean;
debugInfo?: {
vectorSearchTime: number;
rerankTime: number;
candidatesConsidered: number;
};
}
export interface ScoredDocument {
document: VectorDocument;
score: number;
relevanceExplanation?: string;
highlightedContent?: string;
matchedChunks?: DocumentChunk[];
}
export interface EmbeddingModel {
name: string;
dimensions: number;
maxTokens: number;
embed(text: string): Promise<number[]>;
embedBatch(texts: string[]): Promise<number[][]>;
}
export interface VectorStore {
initialize(): Promise<void>;
addDocuments(documents: VectorDocument[]): Promise<void>;
updateDocument(document: VectorDocument): Promise<void>;
deleteDocument(id: string): Promise<void>;
search(query: number[], filters?: QueryFilter[], maxResults?: number): Promise<ScoredDocument[]>;
hybridSearch(query: string, vector: number[], filters?: QueryFilter[]): Promise<ScoredDocument[]>;
getDocument(id: string): Promise<VectorDocument | null>;
getStats(): Promise<VectorStoreStats>;
compact(): Promise<void>;
close(): Promise<void>;
}
export interface VectorStoreStats {
totalDocuments: number;
totalChunks: number;
indexSize: number;
memoryUsage: number;
lastUpdated: Date;
averageDocumentSize: number;
}
export interface CodeChunker {
chunkDocument(document: VectorDocument): Promise<DocumentChunk[]>;
extractSymbols(content: string, language: string): ExtractedSymbol[];
shouldReindex(oldMetadata: DocumentMetadata, newMetadata: DocumentMetadata): boolean;
}
export interface RAGConfig {
vectorStore: {
provider: 'lancedb' | 'hnswsqlite' | 'memory';
storagePath: string;
dimensions: number;
indexType: 'hnsw' | 'ivf' | 'flat';
maxMemoryUsage: number;
};
embedding: {
model: string;
provider: 'transformers-js' | 'ollama' | 'local';
batchSize: number;
cacheEmbeddings: boolean;
};
chunking: {
strategy: 'semantic' | 'fixed' | 'adaptive' | 'ast-based';
maxChunkSize: number;
overlapSize: number;
respectCodeBoundaries: boolean;
};
indexing: {
enabled: boolean;
watchPaths: string[];
debounceMs: number;
batchSize: number;
excludePatterns: string[];
};
retrieval: {
defaultMaxResults: number;
hybridAlpha: number;
rerankingEnabled: boolean;
contextExpansion: boolean;
};
}
export declare class VectorRAGSystem extends EventEmitter {
private logger;
private config;
private vectorStore;
private embeddingModel;
private codeChunker;
private modelClient;
private fileWatcher?;
private embeddingCache;
private indexingQueue;
private isIndexing;
private performanceMetrics;
constructor(config: RAGConfig, modelClient: UnifiedModelClient);
/**
* Initialize the RAG system
*/
initialize(): Promise<void>;
/**
* Query the RAG system
*/
query(ragQuery: RAGQuery): Promise<RAGResult>;
/**
* Index a single document
*/
indexDocument(filePath: string): Promise<void>;
/**
* Update an existing document
*/
updateDocument(filePath: string): Promise<void>;
/**
* Get system statistics
*/
getStats(): Promise<RAGSystemStats>;
/**
* Private Methods
*/
private initializeComponents;
private startFileWatching;
private performInitialIndexing;
private indexDirectory;
private shouldIndexFile;
private createVectorDocument;
private generateEmbeddings;
private semanticSearch;
private hybridSearch;
private exactSearch;
private rerankResults;
private parseRankings;
private applyRankings;
private queueForIndexing;
private processIndexingQueue;
private detectLanguage;
private detectSemanticType;
private calculateHash;
private debounce;
/**
* Public API methods
*/
shutdown(): Promise<void>;
compactIndex(): Promise<void>;
clearCache(): Promise<void>;
}
export interface RAGSystemStats {
vectorStore: VectorStoreStats;
performance: PerformanceStats;
indexing: {
queueSize: number;
isIndexing: boolean;
watchedPaths: number;
cacheSize: number;
};
config: RAGConfig;
}
export interface PerformanceStats {
totalQueries: number;
averageQueryTime: number;
averageResultsPerQuery: number;
cacheHitRate: number;
methodBreakdown: Record<string, number>;
}
//# sourceMappingURL=vector-rag-system.d.ts.map