UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

499 lines (429 loc) 18.4 kB
/** * Semantic Code Search Plugin * * Index and search code semantically using @ruvector/wasm vector database. * Enables natural language queries like "find functions that handle authentication". * * Features: * - Index code chunks with embeddings * - Natural language code search (<1ms with HNSW) * - Symbol-aware chunking * - Language-specific tokenization * - Incremental indexing * * @example * ```typescript * import { semanticCodeSearchPlugin } from '@claude-flow/plugins/examples/ruvector-plugins'; * await getDefaultRegistry().register(semanticCodeSearchPlugin); * ``` */ import { PluginBuilder, MCPToolBuilder, HookBuilder, HookEvent, HookPriority, Security, } from '../../src/index.js'; // Import shared vector utilities (consolidated from all plugins) import { IVectorDB, createVectorDB, generateHashEmbedding, } from './shared/vector-utils.js'; // ============================================================================ // Types // ============================================================================ export interface CodeChunk { id: string; filePath: string; content: string; language: string; startLine: number; endLine: number; symbolType?: 'function' | 'class' | 'method' | 'variable' | 'import' | 'type' | 'interface'; symbolName?: string; embedding?: Float32Array; metadata: { size: number; hash: string; indexedAt: Date; dependencies?: string[]; }; } export interface CodeSearchResult { chunk: CodeChunk; similarity: number; relevance: number; highlights?: string[]; } export interface CodeSearchOptions { k?: number; minSimilarity?: number; languages?: string[]; symbolTypes?: CodeChunk['symbolType'][]; filePaths?: string[]; includeContent?: boolean; } // ============================================================================ // Semantic Code Search Core // ============================================================================ export class SemanticCodeSearch { private vectorDb: IVectorDB | null = null; private chunks = new Map<string, CodeChunk>(); private fileIndex = new Map<string, Set<string>>(); private dimensions: number; private nextId = 1; private initPromise: Promise<void> | null = null; constructor(dimensions: number = 768) { this.dimensions = dimensions; } async initialize(): Promise<void> { if (this.vectorDb) return; if (this.initPromise) return this.initPromise; this.initPromise = (async () => { this.vectorDb = await createVectorDB(this.dimensions); })(); return this.initPromise; } private async ensureInitialized(): Promise<IVectorDB> { await this.initialize(); return this.vectorDb!; } /** * Index a code file by chunking and embedding. */ async indexFile(filePath: string, content: string, language: string): Promise<string[]> { const db = await this.ensureInitialized(); const safePath = Security.validateString(filePath, { maxLength: 500 }); const safeContent = Security.validateString(content, { maxLength: 1_000_000 }); const safeLang = Security.validateString(language, { maxLength: 50 }); await this.removeFile(safePath); const chunks = this.chunkCode(safeContent, safeLang, safePath); const chunkIds: string[] = []; for (const chunk of chunks) { const id = `code-${this.nextId++}`; const embedding = this.generateCodeEmbedding(chunk.content, chunk.symbolName, chunk.symbolType); const fullChunk: CodeChunk = { ...chunk, id, embedding }; db.insert(embedding, id, { filePath: chunk.filePath, language: chunk.language, symbolType: chunk.symbolType, symbolName: chunk.symbolName, startLine: chunk.startLine, endLine: chunk.endLine, }); this.chunks.set(id, fullChunk); if (!this.fileIndex.has(safePath)) { this.fileIndex.set(safePath, new Set()); } this.fileIndex.get(safePath)!.add(id); chunkIds.push(id); } return chunkIds; } /** * Search code using natural language query (<1ms with HNSW). */ async search(query: string, options?: CodeSearchOptions): Promise<CodeSearchResult[]> { const db = await this.ensureInitialized(); const safeQuery = Security.validateString(query, { maxLength: 1000 }); const k = options?.k ?? 10; const minSimilarity = options?.minSimilarity ?? 0.3; const queryEmbedding = this.generateQueryEmbedding(safeQuery); const searchResults = db.search(queryEmbedding, k * 2); const results: CodeSearchResult[] = []; for (const result of searchResults) { if (result.score < minSimilarity) continue; const chunk = this.chunks.get(result.id); if (!chunk) continue; if (options?.languages && !options.languages.includes(chunk.language)) continue; if (options?.symbolTypes && chunk.symbolType && !options.symbolTypes.includes(chunk.symbolType)) continue; if (options?.filePaths) { const matchesPath = options.filePaths.some(p => chunk.filePath.includes(p)); if (!matchesPath) continue; } const relevance = this.calculateRelevance(chunk, safeQuery); const highlights = this.findHighlights(chunk.content, safeQuery); results.push({ chunk: options?.includeContent === false ? { ...chunk, content: '' } : chunk, similarity: result.score, relevance, highlights, }); if (results.length >= k) break; } return results.sort((a, b) => (b.similarity * b.relevance) - (a.similarity * a.relevance)); } /** * Find similar code to a given snippet. */ async findSimilar(code: string, k: number = 5): Promise<CodeSearchResult[]> { const db = await this.ensureInitialized(); const safeCode = Security.validateString(code, { maxLength: 10000 }); const embedding = this.generateCodeEmbedding(safeCode); const searchResults = db.search(embedding, k); return searchResults.map(r => { const chunk = this.chunks.get(r.id)!; return { chunk, similarity: r.score, relevance: 1 }; }).filter(r => r.chunk); } /** * Remove all chunks for a file. */ async removeFile(filePath: string): Promise<number> { const db = await this.ensureInitialized(); const chunkIds = this.fileIndex.get(filePath); if (!chunkIds) return 0; let removed = 0; for (const id of chunkIds) { db.delete(id); this.chunks.delete(id); removed++; } this.fileIndex.delete(filePath); return removed; } /** * Get index statistics. */ getStats(): { totalChunks: number; totalFiles: number; byLanguage: Record<string, number>; bySymbolType: Record<string, number>; } { const byLanguage: Record<string, number> = {}; const bySymbolType: Record<string, number> = {}; for (const chunk of this.chunks.values()) { byLanguage[chunk.language] = (byLanguage[chunk.language] ?? 0) + 1; if (chunk.symbolType) { bySymbolType[chunk.symbolType] = (bySymbolType[chunk.symbolType] ?? 0) + 1; } } return { totalChunks: this.chunks.size, totalFiles: this.fileIndex.size, byLanguage, bySymbolType, }; } // ========================================================================= // Private Helpers // ========================================================================= private chunkCode(content: string, language: string, filePath: string): Omit<CodeChunk, 'id' | 'embedding'>[] { const chunks: Omit<CodeChunk, 'id' | 'embedding'>[] = []; const lines = content.split('\n'); const patterns = this.getLanguagePatterns(language); let currentChunk: string[] = []; let chunkStartLine = 1; let currentSymbol: { type: CodeChunk['symbolType']; name: string } | null = null; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const lineNum = i + 1; const symbol = this.detectSymbol(line, patterns); if (symbol && currentChunk.length > 0) { chunks.push(this.createChunk(currentChunk.join('\n'), filePath, language, chunkStartLine, lineNum - 1, currentSymbol)); currentChunk = []; chunkStartLine = lineNum; } if (symbol) currentSymbol = symbol; currentChunk.push(line); if (currentChunk.length >= 50 && !this.isInsideBlock(currentChunk)) { chunks.push(this.createChunk(currentChunk.join('\n'), filePath, language, chunkStartLine, lineNum, currentSymbol)); currentChunk = []; chunkStartLine = lineNum + 1; currentSymbol = null; } } if (currentChunk.length > 0) { chunks.push(this.createChunk(currentChunk.join('\n'), filePath, language, chunkStartLine, lines.length, currentSymbol)); } return chunks; } private createChunk( content: string, filePath: string, language: string, startLine: number, endLine: number, symbol: { type: CodeChunk['symbolType']; name: string } | null ): Omit<CodeChunk, 'id' | 'embedding'> { return { filePath, content, language, startLine, endLine, symbolType: symbol?.type, symbolName: symbol?.name, metadata: { size: content.length, hash: this.hashString(content), indexedAt: new Date() }, }; } private getLanguagePatterns(language: string): RegExp[] { const patterns: Record<string, RegExp[]> = { typescript: [/^\s*(export\s+)?(async\s+)?function\s+(\w+)/, /^\s*(export\s+)?class\s+(\w+)/, /^\s*(export\s+)?interface\s+(\w+)/, /^\s*(export\s+)?type\s+(\w+)/], javascript: [/^\s*(export\s+)?(async\s+)?function\s+(\w+)/, /^\s*(export\s+)?class\s+(\w+)/], python: [/^\s*def\s+(\w+)/, /^\s*class\s+(\w+)/], rust: [/^\s*(pub\s+)?fn\s+(\w+)/, /^\s*(pub\s+)?struct\s+(\w+)/, /^\s*(pub\s+)?enum\s+(\w+)/], go: [/^\s*func\s+(\w+)/, /^\s*type\s+(\w+)\s+struct/], }; return patterns[language] ?? patterns.typescript; } private detectSymbol(line: string, patterns: RegExp[]): { type: CodeChunk['symbolType']; name: string } | null { for (const pattern of patterns) { const match = line.match(pattern); if (match) { const name = match[match.length - 1] || match[2] || match[1]; let type: CodeChunk['symbolType'] = 'function'; if (line.includes('class ')) type = 'class'; else if (line.includes('interface ')) type = 'interface'; else if (line.includes('type ')) type = 'type'; return { type, name }; } } return null; } private isInsideBlock(lines: string[]): boolean { let braceCount = 0; for (const line of lines) { braceCount += (line.match(/{/g) || []).length - (line.match(/}/g) || []).length; } return braceCount > 0; } private generateCodeEmbedding(code: string, symbolName?: string, symbolType?: string): Float32Array { const text = `${symbolType || ''} ${symbolName || ''} ${code}`.toLowerCase(); const embedding = new Float32Array(this.dimensions); let hash = 0; for (let i = 0; i < text.length; i++) { hash = ((hash << 5) - hash) + text.charCodeAt(i); hash = hash & hash; } for (let i = 0; i < this.dimensions; i++) { embedding[i] = Math.sin(hash * (i + 1) * 0.001) * 0.5 + 0.5; } let norm = 0; for (let i = 0; i < this.dimensions; i++) norm += embedding[i] * embedding[i]; norm = Math.sqrt(norm); for (let i = 0; i < this.dimensions; i++) embedding[i] /= norm; return embedding; } private generateQueryEmbedding(query: string): Float32Array { return this.generateCodeEmbedding(query); } private calculateRelevance(chunk: CodeChunk, query: string): number { let relevance = 0.5; const queryLower = query.toLowerCase(); const symbolLower = chunk.symbolName?.toLowerCase() ?? ''; if (symbolLower && queryLower.includes(symbolLower)) relevance += 0.3; if (symbolLower && symbolLower.includes(queryLower.split(' ')[0])) relevance += 0.2; return Math.min(1, relevance); } private findHighlights(content: string, query: string): string[] { const highlights: string[] = []; const keywords = query.toLowerCase().split(/\s+/).filter(k => k.length > 2); const lines = content.split('\n'); for (const line of lines) { for (const keyword of keywords) { if (line.toLowerCase().includes(keyword)) { highlights.push(line.trim()); break; } } if (highlights.length >= 3) break; } return highlights; } private hashString(str: string): string { let hash = 0; for (let i = 0; i < str.length; i++) { hash = ((hash << 5) - hash) + str.charCodeAt(i); hash = hash & hash; } return hash.toString(16); } } // ============================================================================ // Plugin Definition // ============================================================================ let searchInstance: SemanticCodeSearch | null = null; async function getCodeSearch(): Promise<SemanticCodeSearch> { if (!searchInstance) { searchInstance = new SemanticCodeSearch(768); await searchInstance.initialize(); } return searchInstance; } export const semanticCodeSearchPlugin = new PluginBuilder('semantic-code-search', '1.0.0') .withDescription('Semantic code search using @ruvector/wasm HNSW indexing (<1ms search)') .withAuthor('Claude Flow Team') .withTags(['search', 'code', 'semantic', 'ruvector', 'hnsw']) .withMCPTools([ new MCPToolBuilder('code-index') .withDescription('Index a code file for semantic search') .addStringParam('filePath', 'Path to the file', { required: true }) .addStringParam('content', 'File content', { required: true }) .addStringParam('language', 'Programming language', { required: true }) .withHandler(async (params) => { try { const search = await getCodeSearch(); const ids = await search.indexFile(params.filePath as string, params.content as string, params.language as string); return { content: [{ type: 'text', text: `✅ Indexed ${params.filePath}\nCreated ${ids.length} searchable chunks` }] }; } catch (error) { return { content: [{ type: 'text', text: `❌ Error: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }) .build(), new MCPToolBuilder('code-search') .withDescription('Search code using natural language (<1ms with HNSW)') .addStringParam('query', 'Natural language search query', { required: true }) .addNumberParam('k', 'Number of results', { default: 5 }) .addNumberParam('minSimilarity', 'Minimum similarity', { default: 0.3 }) .withHandler(async (params) => { try { const search = await getCodeSearch(); const results = await search.search(params.query as string, { k: params.k as number, minSimilarity: params.minSimilarity as number }); if (results.length === 0) return { content: [{ type: 'text', text: '🔍 No matching code found.' }] }; const output = results.map((r, i) => `**${i + 1}. ${r.chunk.filePath}:${r.chunk.startLine}** (${(r.similarity * 100).toFixed(1)}%)\n ${r.chunk.symbolType ? `[${r.chunk.symbolType}] ` : ''}${r.chunk.symbolName || ''}`).join('\n\n'); return { content: [{ type: 'text', text: `🔍 **Found ${results.length} matches:**\n\n${output}` }] }; } catch (error) { return { content: [{ type: 'text', text: `❌ Error: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }) .build(), new MCPToolBuilder('code-similar') .withDescription('Find code similar to a given snippet') .addStringParam('code', 'Code snippet', { required: true }) .addNumberParam('k', 'Number of results', { default: 5 }) .withHandler(async (params) => { try { const search = await getCodeSearch(); const results = await search.findSimilar(params.code as string, params.k as number); if (results.length === 0) return { content: [{ type: 'text', text: '🔍 No similar code found.' }] }; const output = results.map((r, i) => `**${i + 1}. ${r.chunk.filePath}:${r.chunk.startLine}** (${(r.similarity * 100).toFixed(1)}% similar)`).join('\n'); return { content: [{ type: 'text', text: `🔍 **Similar code:**\n\n${output}` }] }; } catch (error) { return { content: [{ type: 'text', text: `❌ Error: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }) .build(), new MCPToolBuilder('code-stats') .withDescription('Get code index statistics') .withHandler(async () => { const search = await getCodeSearch(); const stats = search.getStats(); return { content: [{ type: 'text', text: `📊 **Code Index:**\n\n**Chunks:** ${stats.totalChunks}\n**Files:** ${stats.totalFiles}\n**Backend:** @ruvector/wasm HNSW` }] }; }) .build(), ]) .withHooks([ new HookBuilder(HookEvent.PostFileWrite) .withName('code-auto-index') .withDescription('Auto-index code files on write') .withPriority(HookPriority.Low) .when((ctx) => { const data = ctx.data as { filePath?: string } | undefined; if (!data?.filePath) return false; const ext = data.filePath.split('.').pop()?.toLowerCase(); return ['ts', 'js', 'tsx', 'jsx', 'py', 'rs', 'go'].includes(ext || ''); }) .handle(async (ctx) => { const data = ctx.data as { filePath: string; content?: string }; if (!data.content) return { success: true }; const langMap: Record<string, string> = { ts: 'typescript', tsx: 'typescript', js: 'javascript', jsx: 'javascript', py: 'python', rs: 'rust', go: 'go' }; const ext = data.filePath.split('.').pop()?.toLowerCase() || ''; try { const search = await getCodeSearch(); await search.indexFile(data.filePath, data.content, langMap[ext] || 'typescript'); } catch { /* silent */ } return { success: true }; }) .build(), ]) .onInitialize(async (ctx) => { ctx.logger.info('Semantic Code Search initializing with @ruvector/wasm...'); await getCodeSearch(); ctx.logger.info('Semantic Code Search ready - HNSW indexing enabled'); }) .build(); export default semanticCodeSearchPlugin;