UNPKG

@opichi/smartcode

Version:

Universal code intelligence MCP server - analyze any codebase with TypeScript excellence and multi-language support

146 lines 5.72 kB
import { pipeline } from '@xenova/transformers'; export class CodeEmbedder { model = null; modelName = 'Xenova/all-MiniLM-L6-v2'; // 384-dimensional embeddings async initialize() { try { console.log('Loading embedding model...'); this.model = await pipeline('feature-extraction', this.modelName); console.log('Embedding model loaded successfully'); } catch (error) { console.error('Failed to load embedding model:', error); throw error; } } async embedNode(node) { if (!this.model) { throw new Error('Embedder not initialized'); } try { const text = this.prepareTextForEmbedding(node); const embedding = await this.generateEmbedding(text); return { ...node, embedding }; } catch (error) { console.error(`Failed to embed node ${node.id}:`, error); return node; // Return node without embedding rather than failing } } async embedNodes(nodes) { if (!this.model) { throw new Error('Embedder not initialized'); } const embeddedNodes = []; // Process in batches to avoid memory issues const batchSize = 10; for (let i = 0; i < nodes.length; i += batchSize) { const batch = nodes.slice(i, i + batchSize); const batchPromises = batch.map(node => this.embedNode(node)); const batchResults = await Promise.all(batchPromises); embeddedNodes.push(...batchResults); if (i % 50 === 0) { console.log(`Embedded ${Math.min(i + batchSize, nodes.length)}/${nodes.length} nodes`); } } return embeddedNodes; } async embedQuery(query) { if (!this.model) { throw new Error('Embedder not initialized'); } try { return await this.generateEmbedding(query); } catch (error) { console.error('Failed to embed query:', error); throw error; } } prepareTextForEmbedding(node) { // Create a rich text representation for better semantic understanding const parts = []; // Add type and name parts.push(`${node.type} ${node.name}`); // Add metadata context if (node.metadata.parameters && node.metadata.parameters.length > 0) { parts.push(`parameters: ${node.metadata.parameters.join(', ')}`); } if (node.metadata.methods && node.metadata.methods.length > 0) { parts.push(`methods: ${node.metadata.methods.join(', ')}`); } // Add file context const fileName = node.filePath.split('/').pop() || ''; const fileContext = this.inferFileContext(fileName); if (fileContext) { parts.push(fileContext); } // Add cleaned code content (first few lines) const cleanedContent = this.cleanCodeForEmbedding(node.content); parts.push(cleanedContent); return parts.join(' '); } cleanCodeForEmbedding(content) { // Clean and truncate code for better embeddings const lines = content.split('\n'); // Remove empty lines and comments const meaningfulLines = lines .filter(line => { const trimmed = line.trim(); return trimmed.length > 0 && !trimmed.startsWith('//') && !trimmed.startsWith('#') && !trimmed.startsWith('/*') && !trimmed.startsWith('*'); }) .slice(0, 5); // Take first 5 meaningful lines return meaningfulLines.join(' ').substring(0, 500); // Limit to 500 chars } inferFileContext(fileName) { // Infer context from file name and path const contexts = { 'controller': 'handles HTTP requests and responses', 'model': 'defines data structure and business logic', 'service': 'contains business logic and operations', 'util': 'provides utility functions and helpers', 'helper': 'provides utility functions and helpers', 'config': 'contains configuration settings', 'route': 'defines URL routing and endpoints', 'middleware': 'processes requests before controllers', 'test': 'contains test cases and specifications', 'spec': 'contains test cases and specifications', 'component': 'UI component for user interface', 'page': 'web page or application screen', 'api': 'API endpoint or interface definition' }; const lowerFileName = fileName.toLowerCase(); for (const [key, context] of Object.entries(contexts)) { if (lowerFileName.includes(key)) { return context; } } return ''; } async generateEmbedding(text) { try { const result = await this.model(text, { pooling: 'mean', normalize: true }); // Extract the embedding array from the result if (result && typeof result === 'object' && 'data' in result) { return Array.from(result.data); } // Fallback for different result formats if (Array.isArray(result)) { return result; } throw new Error('Unexpected embedding result format'); } catch (error) { console.error('Error generating embedding:', error); throw error; } } } //# sourceMappingURL=embedder.js.map