UNPKG

codecrucible-synth

Version:

Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability

226 lines 6.55 kB
/** * Vector-Based RAG System for CodeCrucible Synth * Production-ready implementation with local-first architecture, LanceDB storage, * and real-time incremental indexing optimized for code repositories */ import { EventEmitter } from 'events'; import { UnifiedModelClient } from '../../refactor/unified-model-client.js'; export interface VectorDocument { id: string; content: string; embedding?: number[]; metadata: DocumentMetadata; chunks?: DocumentChunk[]; } export interface DocumentMetadata { filePath: string; language: string; fileType: string; lastModified: Date; size: number; hash: string; repository?: string; branch?: string; author?: string; semanticType: 'code' | 'documentation' | 'configuration' | 'test'; extractedSymbols?: ExtractedSymbol[]; } export interface ExtractedSymbol { name: string; type: 'function' | 'class' | 'interface' | 'variable' | 'constant'; startLine: number; endLine: number; signature?: string; docstring?: string; } export interface DocumentChunk { id: string; content: string; embedding: number[]; startOffset: number; endOffset: number; chunkType: 'function' | 'class' | 'block' | 'comment' | 'documentation'; parentDocument: string; semanticWeight: number; } export interface RAGQuery { query: string; queryType: 'semantic' | 'exact' | 'hybrid'; filters?: QueryFilter[]; maxResults?: number; threshold?: number; contextWindow?: number; includeMetadata?: boolean; rerank?: boolean; } export interface QueryFilter { field: string; operator: 'equals' | 'contains' | 'startsWith' | 'in' | 'gt' | 'lt'; value: string | string[] | number; } export interface RAGResult { documents: ScoredDocument[]; totalFound: number; queryTime: number; retrievalMethod: string; reranked: boolean; debugInfo?: { vectorSearchTime: number; rerankTime: number; candidatesConsidered: number; }; } export interface ScoredDocument { document: VectorDocument; score: number; relevanceExplanation?: string; highlightedContent?: string; matchedChunks?: DocumentChunk[]; } export interface EmbeddingModel { name: string; dimensions: number; maxTokens: number; embed(text: string): Promise<number[]>; embedBatch(texts: string[]): Promise<number[][]>; } export interface VectorStore { initialize(): Promise<void>; addDocuments(documents: VectorDocument[]): Promise<void>; updateDocument(document: VectorDocument): Promise<void>; deleteDocument(id: string): Promise<void>; search(query: number[], filters?: QueryFilter[], maxResults?: number): Promise<ScoredDocument[]>; hybridSearch(query: string, vector: number[], filters?: QueryFilter[]): Promise<ScoredDocument[]>; getDocument(id: string): Promise<VectorDocument | null>; getStats(): Promise<VectorStoreStats>; compact(): Promise<void>; close(): Promise<void>; } export interface VectorStoreStats { totalDocuments: number; totalChunks: number; indexSize: number; memoryUsage: number; lastUpdated: Date; averageDocumentSize: number; } export interface CodeChunker { chunkDocument(document: VectorDocument): Promise<DocumentChunk[]>; extractSymbols(content: string, language: string): ExtractedSymbol[]; shouldReindex(oldMetadata: DocumentMetadata, newMetadata: DocumentMetadata): boolean; } export interface RAGConfig { vectorStore: { provider: 'lancedb' | 'hnswsqlite' | 'memory'; storagePath: string; dimensions: number; indexType: 'hnsw' | 'ivf' | 'flat'; maxMemoryUsage: number; }; embedding: { model: string; provider: 'transformers-js' | 'ollama' | 'local'; batchSize: number; cacheEmbeddings: boolean; }; chunking: { strategy: 'semantic' | 'fixed' | 'adaptive' | 'ast-based'; maxChunkSize: number; overlapSize: number; respectCodeBoundaries: boolean; }; indexing: { enabled: boolean; watchPaths: string[]; debounceMs: number; batchSize: number; excludePatterns: string[]; }; retrieval: { defaultMaxResults: number; hybridAlpha: number; rerankingEnabled: boolean; contextExpansion: boolean; }; } export declare class VectorRAGSystem extends EventEmitter { private logger; private config; private vectorStore; private embeddingModel; private codeChunker; private modelClient; private fileWatcher?; private embeddingCache; private indexingQueue; private isIndexing; private performanceMetrics; constructor(config: RAGConfig, modelClient: UnifiedModelClient); /** * Initialize the RAG system */ initialize(): Promise<void>; /** * Query the RAG system */ query(ragQuery: RAGQuery): Promise<RAGResult>; /** * Index a single document */ indexDocument(filePath: string): Promise<void>; /** * Update an existing document */ updateDocument(filePath: string): Promise<void>; /** * Get system statistics */ getStats(): Promise<RAGSystemStats>; /** * Private Methods */ private initializeComponents; private startFileWatching; private performInitialIndexing; private indexDirectory; private shouldIndexFile; private createVectorDocument; private generateEmbeddings; private semanticSearch; private hybridSearch; private exactSearch; private rerankResults; private parseRankings; private applyRankings; private queueForIndexing; private processIndexingQueue; private detectLanguage; private detectSemanticType; private calculateHash; private debounce; /** * Public API methods */ shutdown(): Promise<void>; compactIndex(): Promise<void>; clearCache(): Promise<void>; } export interface RAGSystemStats { vectorStore: VectorStoreStats; performance: PerformanceStats; indexing: { queueSize: number; isIndexing: boolean; watchedPaths: number; cacheSize: number; }; config: RAGConfig; } export interface PerformanceStats { totalQueries: number; averageQueryTime: number; averageResultsPerQuery: number; cacheHitRate: number; methodBreakdown: Record<string, number>; } //# sourceMappingURL=vector-rag-system.d.ts.map