zrald
Version:
Advanced Graph RAG MCP Server with sophisticated graph structures, operators, and agentic capabilities for AI agents
192 lines • 7.2 kB
JavaScript
export class EmbeddingGenerator {
modelName;
dimension;
constructor(modelName = 'mock-embedding-model', dimension = 384) {
this.modelName = modelName;
this.dimension = dimension;
}
async initialize() {
console.log(`Mock embedding model initialized: ${this.modelName}`);
}
async generateEmbedding(text) {
// Generate a deterministic mock embedding based on text hash
const hash = this.simpleHash(text);
const embedding = [];
for (let i = 0; i < this.dimension; i++) {
// Create pseudo-random but deterministic values
const seed = hash + i;
embedding.push(Math.sin(seed) * 0.5 + 0.5);
}
// Normalize the vector
return EmbeddingGenerator.normalizeVector(embedding);
}
simpleHash(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash);
}
async generateBatchEmbeddings(texts) {
const embeddings = [];
// Process in batches to avoid memory issues
const batchSize = 32;
for (let i = 0; i < texts.length; i += batchSize) {
const batch = texts.slice(i, i + batchSize);
const batchPromises = batch.map(text => this.generateEmbedding(text));
const batchEmbeddings = await Promise.all(batchPromises);
embeddings.push(...batchEmbeddings);
}
return embeddings;
}
getDimension() {
return this.dimension;
}
getModelName() {
return this.modelName;
}
isInitialized() {
return true; // Always initialized for mock implementation
}
// Utility methods for embedding operations
static cosineSimilarity(a, b) {
if (a.length !== b.length) {
throw new Error('Vectors must have the same dimension');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
static euclideanDistance(a, b) {
if (a.length !== b.length) {
throw new Error('Vectors must have the same dimension');
}
let sum = 0;
for (let i = 0; i < a.length; i++) {
const diff = a[i] - b[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}
static normalizeVector(vector) {
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
return norm > 0 ? vector.map(val => val / norm) : vector;
}
static addVectors(a, b) {
if (a.length !== b.length) {
throw new Error('Vectors must have the same dimension');
}
return a.map((val, i) => val + b[i]);
}
static subtractVectors(a, b) {
if (a.length !== b.length) {
throw new Error('Vectors must have the same dimension');
}
return a.map((val, i) => val - b[i]);
}
static scaleVector(vector, scale) {
return vector.map(val => val * scale);
}
static averageVectors(vectors) {
if (vectors.length === 0) {
throw new Error('Cannot average empty vector array');
}
const dimension = vectors[0].length;
const sum = new Array(dimension).fill(0);
for (const vector of vectors) {
if (vector.length !== dimension) {
throw new Error('All vectors must have the same dimension');
}
for (let i = 0; i < dimension; i++) {
sum[i] += vector[i];
}
}
return sum.map(val => val / vectors.length);
}
// Text preprocessing utilities
static preprocessText(text) {
return text
.toLowerCase()
.replace(/[^\w\s]/g, ' ') // Remove punctuation
.replace(/\s+/g, ' ') // Normalize whitespace
.trim();
}
static chunkText(text, maxLength = 512, overlap = 50) {
const words = text.split(' ');
const chunks = [];
for (let i = 0; i < words.length; i += maxLength - overlap) {
const chunk = words.slice(i, i + maxLength).join(' ');
chunks.push(chunk);
if (i + maxLength >= words.length) {
break;
}
}
return chunks;
}
// Semantic search utilities
async semanticSearch(query, documents, topK = 5) {
const queryEmbedding = await this.generateEmbedding(query);
const documentEmbeddings = await this.generateBatchEmbeddings(documents);
const similarities = documentEmbeddings.map((docEmb, index) => ({
text: documents[index],
score: EmbeddingGenerator.cosineSimilarity(queryEmbedding, docEmb),
index
}));
return similarities
.sort((a, b) => b.score - a.score)
.slice(0, topK);
}
// Clustering utilities
static kMeansClustering(embeddings, k, maxIterations = 100) {
const dimension = embeddings[0].length;
// Initialize centroids randomly
let centroids = [];
for (let i = 0; i < k; i++) {
const centroid = Array(dimension).fill(0).map(() => Math.random() * 2 - 1);
centroids.push(EmbeddingGenerator.normalizeVector(centroid));
}
let assignments = new Array(embeddings.length).fill(0);
for (let iteration = 0; iteration < maxIterations; iteration++) {
// Assign points to nearest centroid
const newAssignments = embeddings.map(embedding => {
let bestCluster = 0;
let bestDistance = Infinity;
for (let j = 0; j < k; j++) {
const distance = EmbeddingGenerator.euclideanDistance(embedding, centroids[j]);
if (distance < bestDistance) {
bestDistance = distance;
bestCluster = j;
}
}
return bestCluster;
});
// Check for convergence
if (JSON.stringify(assignments) === JSON.stringify(newAssignments)) {
break;
}
assignments = newAssignments;
// Update centroids
for (let j = 0; j < k; j++) {
const clusterPoints = embeddings.filter((_, i) => assignments[i] === j);
if (clusterPoints.length > 0) {
centroids[j] = EmbeddingGenerator.averageVectors(clusterPoints);
}
}
}
// Group embeddings by cluster
const clusters = Array(k).fill(null).map(() => []);
embeddings.forEach((embedding, i) => {
clusters[assignments[i]].push(embedding);
});
return { clusters, centroids, assignments };
}
}
//# sourceMappingURL=embedding-generator.js.map