UNPKG

zrald

Version:

Advanced Graph RAG MCP Server with sophisticated graph structures, operators, and agentic capabilities for AI agents

192 lines 7.2 kB
export class EmbeddingGenerator { modelName; dimension; constructor(modelName = 'mock-embedding-model', dimension = 384) { this.modelName = modelName; this.dimension = dimension; } async initialize() { console.log(`Mock embedding model initialized: ${this.modelName}`); } async generateEmbedding(text) { // Generate a deterministic mock embedding based on text hash const hash = this.simpleHash(text); const embedding = []; for (let i = 0; i < this.dimension; i++) { // Create pseudo-random but deterministic values const seed = hash + i; embedding.push(Math.sin(seed) * 0.5 + 0.5); } // Normalize the vector return EmbeddingGenerator.normalizeVector(embedding); } simpleHash(str) { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash); } async generateBatchEmbeddings(texts) { const embeddings = []; // Process in batches to avoid memory issues const batchSize = 32; for (let i = 0; i < texts.length; i += batchSize) { const batch = texts.slice(i, i + batchSize); const batchPromises = batch.map(text => this.generateEmbedding(text)); const batchEmbeddings = await Promise.all(batchPromises); embeddings.push(...batchEmbeddings); } return embeddings; } getDimension() { return this.dimension; } getModelName() { return this.modelName; } isInitialized() { return true; // Always initialized for mock implementation } // Utility methods for embedding operations static cosineSimilarity(a, b) { if (a.length !== b.length) { throw new Error('Vectors must have the same dimension'); } let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); } static euclideanDistance(a, b) { if (a.length !== b.length) { throw new Error('Vectors must have the same dimension'); } let sum = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; sum += diff * diff; } return Math.sqrt(sum); } static normalizeVector(vector) { const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); return norm > 0 ? vector.map(val => val / norm) : vector; } static addVectors(a, b) { if (a.length !== b.length) { throw new Error('Vectors must have the same dimension'); } return a.map((val, i) => val + b[i]); } static subtractVectors(a, b) { if (a.length !== b.length) { throw new Error('Vectors must have the same dimension'); } return a.map((val, i) => val - b[i]); } static scaleVector(vector, scale) { return vector.map(val => val * scale); } static averageVectors(vectors) { if (vectors.length === 0) { throw new Error('Cannot average empty vector array'); } const dimension = vectors[0].length; const sum = new Array(dimension).fill(0); for (const vector of vectors) { if (vector.length !== dimension) { throw new Error('All vectors must have the same dimension'); } for (let i = 0; i < dimension; i++) { sum[i] += vector[i]; } } return sum.map(val => val / vectors.length); } // Text preprocessing utilities static preprocessText(text) { return text .toLowerCase() .replace(/[^\w\s]/g, ' ') // Remove punctuation .replace(/\s+/g, ' ') // Normalize whitespace .trim(); } static chunkText(text, maxLength = 512, overlap = 50) { const words = text.split(' '); const chunks = []; for (let i = 0; i < words.length; i += maxLength - overlap) { const chunk = words.slice(i, i + maxLength).join(' '); chunks.push(chunk); if (i + maxLength >= words.length) { break; } } return chunks; } // Semantic search utilities async semanticSearch(query, documents, topK = 5) { const queryEmbedding = await this.generateEmbedding(query); const documentEmbeddings = await this.generateBatchEmbeddings(documents); const similarities = documentEmbeddings.map((docEmb, index) => ({ text: documents[index], score: EmbeddingGenerator.cosineSimilarity(queryEmbedding, docEmb), index })); return similarities .sort((a, b) => b.score - a.score) .slice(0, topK); } // Clustering utilities static kMeansClustering(embeddings, k, maxIterations = 100) { const dimension = embeddings[0].length; // Initialize centroids randomly let centroids = []; for (let i = 0; i < k; i++) { const centroid = Array(dimension).fill(0).map(() => Math.random() * 2 - 1); centroids.push(EmbeddingGenerator.normalizeVector(centroid)); } let assignments = new Array(embeddings.length).fill(0); for (let iteration = 0; iteration < maxIterations; iteration++) { // Assign points to nearest centroid const newAssignments = embeddings.map(embedding => { let bestCluster = 0; let bestDistance = Infinity; for (let j = 0; j < k; j++) { const distance = EmbeddingGenerator.euclideanDistance(embedding, centroids[j]); if (distance < bestDistance) { bestDistance = distance; bestCluster = j; } } return bestCluster; }); // Check for convergence if (JSON.stringify(assignments) === JSON.stringify(newAssignments)) { break; } assignments = newAssignments; // Update centroids for (let j = 0; j < k; j++) { const clusterPoints = embeddings.filter((_, i) => assignments[i] === j); if (clusterPoints.length > 0) { centroids[j] = EmbeddingGenerator.averageVectors(clusterPoints); } } } // Group embeddings by cluster const clusters = Array(k).fill(null).map(() => []); embeddings.forEach((embedding, i) => { clusters[assignments[i]].push(embedding); }); return { clusters, centroids, assignments }; } } //# sourceMappingURL=embedding-generator.js.map