@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
111 lines • 3.62 kB
JavaScript
import { promises as fs } from 'fs';
import path from 'path';
import crypto from 'crypto';
export class LocalEmbeddingModel {
modelName;
dimension;
cachePath;
initialized = false;
constructor(config = {}) {
this.modelName = config.modelName || 'all-MiniLM-L6-v2';
this.dimension = config.dimension || 384;
this.cachePath = config.cachePath || '.atlas/rag/embeddings-cache';
}
async initialize() {
if (this.initialized)
return;
// Ensure cache directory exists
try {
await fs.access(this.cachePath);
}
catch {
await fs.mkdir(this.cachePath, { recursive: true });
}
this.initialized = true;
}
async embed(texts) {
if (!texts || texts.length === 0) {
return [];
}
const embeddings = [];
for (const text of texts) {
const embedding = await this.embedSingle(text);
embeddings.push(embedding);
}
return embeddings;
}
async embedSingle(text) {
await this.initialize();
// Check cache first
const cacheKey = this.getCacheKey(text);
const cachePath = path.join(this.cachePath, `${cacheKey}.json`);
try {
const cached = await fs.readFile(cachePath, 'utf-8');
const data = JSON.parse(cached);
return new Float32Array(data);
}
catch {
// Not in cache, generate embedding
}
// Generate embedding
const embedding = await this.generateEmbedding(text);
// Cache the result
try {
await fs.writeFile(cachePath, JSON.stringify(Array.from(embedding)));
}
catch {
// Ignore cache write errors
}
return embedding;
}
cosineSimilarity(a, b) {
if (a.length !== b.length) {
throw new Error('Vectors must have the same dimension');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
normA = Math.sqrt(normA);
normB = Math.sqrt(normB);
if (normA === 0 || normB === 0) {
return 0;
}
return dotProduct / (normA * normB);
}
async generateEmbedding(text) {
// For now, we'll create a deterministic mock embedding based on the text
// In a real implementation, this would use a local embedding model
const embedding = new Float32Array(this.dimension);
// Create a simple deterministic embedding based on text characteristics
const hash = crypto.createHash('sha256').update(text).digest();
for (let i = 0; i < this.dimension; i++) {
// Use hash bytes to generate values between -1 and 1
const byte = hash[i % hash.length];
embedding[i] = (byte / 127.5) - 1;
}
// Normalize the embedding
let norm = 0;
for (let i = 0; i < embedding.length; i++) {
norm += embedding[i] * embedding[i];
}
norm = Math.sqrt(norm);
if (norm > 0) {
for (let i = 0; i < embedding.length; i++) {
embedding[i] /= norm;
}
}
return embedding;
}
getCacheKey(text) {
return crypto
.createHash('sha256')
.update(`${this.modelName}:${text}`)
.digest('hex');
}
}
//# sourceMappingURL=embeddings.js.map