@opichi/smartcode
Version:
Universal code intelligence MCP server - analyze any codebase with TypeScript excellence and multi-language support
146 lines • 5.72 kB
JavaScript
import { pipeline } from '@xenova/transformers';
export class CodeEmbedder {
model = null;
modelName = 'Xenova/all-MiniLM-L6-v2'; // 384-dimensional embeddings
async initialize() {
try {
console.log('Loading embedding model...');
this.model = await pipeline('feature-extraction', this.modelName);
console.log('Embedding model loaded successfully');
}
catch (error) {
console.error('Failed to load embedding model:', error);
throw error;
}
}
async embedNode(node) {
if (!this.model) {
throw new Error('Embedder not initialized');
}
try {
const text = this.prepareTextForEmbedding(node);
const embedding = await this.generateEmbedding(text);
return {
...node,
embedding
};
}
catch (error) {
console.error(`Failed to embed node ${node.id}:`, error);
return node; // Return node without embedding rather than failing
}
}
async embedNodes(nodes) {
if (!this.model) {
throw new Error('Embedder not initialized');
}
const embeddedNodes = [];
// Process in batches to avoid memory issues
const batchSize = 10;
for (let i = 0; i < nodes.length; i += batchSize) {
const batch = nodes.slice(i, i + batchSize);
const batchPromises = batch.map(node => this.embedNode(node));
const batchResults = await Promise.all(batchPromises);
embeddedNodes.push(...batchResults);
if (i % 50 === 0) {
console.log(`Embedded ${Math.min(i + batchSize, nodes.length)}/${nodes.length} nodes`);
}
}
return embeddedNodes;
}
async embedQuery(query) {
if (!this.model) {
throw new Error('Embedder not initialized');
}
try {
return await this.generateEmbedding(query);
}
catch (error) {
console.error('Failed to embed query:', error);
throw error;
}
}
prepareTextForEmbedding(node) {
// Create a rich text representation for better semantic understanding
const parts = [];
// Add type and name
parts.push(`${node.type} ${node.name}`);
// Add metadata context
if (node.metadata.parameters && node.metadata.parameters.length > 0) {
parts.push(`parameters: ${node.metadata.parameters.join(', ')}`);
}
if (node.metadata.methods && node.metadata.methods.length > 0) {
parts.push(`methods: ${node.metadata.methods.join(', ')}`);
}
// Add file context
const fileName = node.filePath.split('/').pop() || '';
const fileContext = this.inferFileContext(fileName);
if (fileContext) {
parts.push(fileContext);
}
// Add cleaned code content (first few lines)
const cleanedContent = this.cleanCodeForEmbedding(node.content);
parts.push(cleanedContent);
return parts.join(' ');
}
cleanCodeForEmbedding(content) {
// Clean and truncate code for better embeddings
const lines = content.split('\n');
// Remove empty lines and comments
const meaningfulLines = lines
.filter(line => {
const trimmed = line.trim();
return trimmed.length > 0 &&
!trimmed.startsWith('//') &&
!trimmed.startsWith('#') &&
!trimmed.startsWith('/*') &&
!trimmed.startsWith('*');
})
.slice(0, 5); // Take first 5 meaningful lines
return meaningfulLines.join(' ').substring(0, 500); // Limit to 500 chars
}
inferFileContext(fileName) {
// Infer context from file name and path
const contexts = {
'controller': 'handles HTTP requests and responses',
'model': 'defines data structure and business logic',
'service': 'contains business logic and operations',
'util': 'provides utility functions and helpers',
'helper': 'provides utility functions and helpers',
'config': 'contains configuration settings',
'route': 'defines URL routing and endpoints',
'middleware': 'processes requests before controllers',
'test': 'contains test cases and specifications',
'spec': 'contains test cases and specifications',
'component': 'UI component for user interface',
'page': 'web page or application screen',
'api': 'API endpoint or interface definition'
};
const lowerFileName = fileName.toLowerCase();
for (const [key, context] of Object.entries(contexts)) {
if (lowerFileName.includes(key)) {
return context;
}
}
return '';
}
async generateEmbedding(text) {
try {
const result = await this.model(text, { pooling: 'mean', normalize: true });
// Extract the embedding array from the result
if (result && typeof result === 'object' && 'data' in result) {
return Array.from(result.data);
}
// Fallback for different result formats
if (Array.isArray(result)) {
return result;
}
throw new Error('Unexpected embedding result format');
}
catch (error) {
console.error('Error generating embedding:', error);
throw error;
}
}
}
//# sourceMappingURL=embedder.js.map