UNPKG

@boundless-oss/atlas

Version:

Atlas - MCP Server for comprehensive startup project management

220 lines (187 loc) 5.98 kB
import crypto from 'crypto'; export interface Embedding { text: string; vector: number[]; metadata?: Record<string, any>; } export interface SearchResult { text: string; score: number; metadata?: Record<string, any>; } export class LocalEmbeddings { private embeddings: Map<string, Embedding> = new Map(); /** * Generate a deterministic embedding vector from text using hashing * This is a simple local alternative to using an API */ generateEmbedding(text: string): number[] { // Normalize text const normalized = text.toLowerCase().trim(); // Create multiple hash variants for dimensionality const dimensions = 128; const vector: number[] = []; for (let i = 0; i < dimensions; i++) { const hash = crypto .createHash('sha256') .update(`${normalized}-${i}`) .digest(); // Convert hash bytes to normalized float between -1 and 1 const value = (hash[0] + hash[1] * 256) / 65535 * 2 - 1; vector.push(value); } // Normalize vector const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); return vector.map(val => val / magnitude); } /** * Add text with its embedding to the store */ addDocument(id: string, text: string, metadata?: Record<string, any>): void { const vector = this.generateEmbedding(text); this.embeddings.set(id, { text, vector, metadata, }); } /** * Compute cosine similarity between two vectors */ private cosineSimilarity(a: number[], b: number[]): number { let dotProduct = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; } return dotProduct; } /** * Search for similar documents using cosine similarity */ search(query: string, topK: number = 5, threshold: number = 0.5): SearchResult[] { const queryVector = this.generateEmbedding(query); const results: SearchResult[] = []; for (const [id, embedding] of this.embeddings) { const score = this.cosineSimilarity(queryVector, embedding.vector); if (score >= threshold) { results.push({ text: embedding.text, score, metadata: embedding.metadata, }); } } // Sort by score descending and return top K return results .sort((a, b) => b.score - a.score) .slice(0, topK); } /** * Clear all embeddings */ clear(): void { this.embeddings.clear(); } /** * Get total number of embeddings */ size(): number { return this.embeddings.size; } /** * Export embeddings for persistence */ export(): Array<[string, Embedding]> { return Array.from(this.embeddings.entries()); } /** * Import embeddings from export */ import(data: Array<[string, Embedding]>): void { this.embeddings = new Map(data); } } /** * Create a specialized embeddings store for code search */ export class CodeEmbeddings extends LocalEmbeddings { /** * Add code file with enhanced metadata */ addCodeFile(filePath: string, content: string, language: string): void { // Extract meaningful code features const features = this.extractCodeFeatures(content, language); // Create enriched text representation const enrichedText = `${filePath} ${language} ${features.join(' ')} ${content}`; this.addDocument(filePath, enrichedText, { filePath, language, features, originalContent: content, }); } /** * Extract semantic features from code */ private extractCodeFeatures(content: string, language: string): string[] { const features: string[] = []; // Extract function/method names const functionRegex = /(?:function|def|fn|func)\s+(\w+)/g; let match; while ((match = functionRegex.exec(content)) !== null) { features.push(`function:${match[1]}`); } // Extract class names const classRegex = /(?:class|struct|interface)\s+(\w+)/g; while ((match = classRegex.exec(content)) !== null) { features.push(`class:${match[1]}`); } // Extract imports/dependencies const importRegex = /(?:import|require|use|include)\s+['"]([\w\-\.\/]+)['"]/g; while ((match = importRegex.exec(content)) !== null) { features.push(`import:${match[1]}`); } // Extract variable declarations (limited to avoid noise) const varRegex = /(?:const|let|var|val)\s+(\w+)\s*=/g; const varMatches = content.match(varRegex) || []; if (varMatches.length < 20) { // Only include if not too many varMatches.forEach(v => { const varName = v.match(/(\w+)\s*=/)?.[1]; if (varName) features.push(`var:${varName}`); }); } return features; } /** * Search for code with query understanding */ searchCode(query: string, topK: number = 5): SearchResult[] { // Enhance query with code-specific terms const enhancedQuery = this.enhanceCodeQuery(query); const results = this.search(enhancedQuery, topK, 0.3); // Lower threshold for code // Restore original content in results return results.map(result => ({ ...result, text: result.metadata?.originalContent || result.text, })); } /** * Enhance search query with code-specific understanding */ private enhanceCodeQuery(query: string): string { const enhancements: string[] = [query]; // Add common programming synonyms const synonyms: Record<string, string[]> = { 'function': ['method', 'func', 'fn', 'def'], 'class': ['struct', 'type', 'interface'], 'variable': ['var', 'const', 'let', 'val'], 'import': ['require', 'include', 'use'], }; for (const [term, syns] of Object.entries(synonyms)) { if (query.toLowerCase().includes(term)) { enhancements.push(...syns); } } return enhancements.join(' '); } }