@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
179 lines • 5.93 kB
JavaScript
import crypto from 'crypto';
export class LocalEmbeddings {
embeddings = new Map();
/**
* Generate a deterministic embedding vector from text using hashing
* This is a simple local alternative to using an API
*/
generateEmbedding(text) {
// Normalize text
const normalized = text.toLowerCase().trim();
// Create multiple hash variants for dimensionality
const dimensions = 128;
const vector = [];
for (let i = 0; i < dimensions; i++) {
const hash = crypto
.createHash('sha256')
.update(`${normalized}-${i}`)
.digest();
// Convert hash bytes to normalized float between -1 and 1
const value = (hash[0] + hash[1] * 256) / 65535 * 2 - 1;
vector.push(value);
}
// Normalize vector
const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
return vector.map(val => val / magnitude);
}
/**
* Add text with its embedding to the store
*/
addDocument(id, text, metadata) {
const vector = this.generateEmbedding(text);
this.embeddings.set(id, {
text,
vector,
metadata,
});
}
/**
* Compute cosine similarity between two vectors
*/
cosineSimilarity(a, b) {
let dotProduct = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
}
return dotProduct;
}
/**
* Search for similar documents using cosine similarity
*/
search(query, topK = 5, threshold = 0.5) {
const queryVector = this.generateEmbedding(query);
const results = [];
for (const [id, embedding] of this.embeddings) {
const score = this.cosineSimilarity(queryVector, embedding.vector);
if (score >= threshold) {
results.push({
text: embedding.text,
score,
metadata: embedding.metadata,
});
}
}
// Sort by score descending and return top K
return results
.sort((a, b) => b.score - a.score)
.slice(0, topK);
}
/**
* Clear all embeddings
*/
clear() {
this.embeddings.clear();
}
/**
* Get total number of embeddings
*/
size() {
return this.embeddings.size;
}
/**
* Export embeddings for persistence
*/
export() {
return Array.from(this.embeddings.entries());
}
/**
* Import embeddings from export
*/
import(data) {
this.embeddings = new Map(data);
}
}
/**
* Create a specialized embeddings store for code search
*/
export class CodeEmbeddings extends LocalEmbeddings {
/**
* Add code file with enhanced metadata
*/
addCodeFile(filePath, content, language) {
// Extract meaningful code features
const features = this.extractCodeFeatures(content, language);
// Create enriched text representation
const enrichedText = `${filePath} ${language} ${features.join(' ')} ${content}`;
this.addDocument(filePath, enrichedText, {
filePath,
language,
features,
originalContent: content,
});
}
/**
* Extract semantic features from code
*/
extractCodeFeatures(content, language) {
const features = [];
// Extract function/method names
const functionRegex = /(?:function|def|fn|func)\s+(\w+)/g;
let match;
while ((match = functionRegex.exec(content)) !== null) {
features.push(`function:${match[1]}`);
}
// Extract class names
const classRegex = /(?:class|struct|interface)\s+(\w+)/g;
while ((match = classRegex.exec(content)) !== null) {
features.push(`class:${match[1]}`);
}
// Extract imports/dependencies
const importRegex = /(?:import|require|use|include)\s+['"]([\w\-\.\/]+)['"]/g;
while ((match = importRegex.exec(content)) !== null) {
features.push(`import:${match[1]}`);
}
// Extract variable declarations (limited to avoid noise)
const varRegex = /(?:const|let|var|val)\s+(\w+)\s*=/g;
const varMatches = content.match(varRegex) || [];
if (varMatches.length < 20) { // Only include if not too many
varMatches.forEach(v => {
const varName = v.match(/(\w+)\s*=/)?.[1];
if (varName)
features.push(`var:${varName}`);
});
}
return features;
}
/**
* Search for code with query understanding
*/
searchCode(query, topK = 5) {
// Enhance query with code-specific terms
const enhancedQuery = this.enhanceCodeQuery(query);
const results = this.search(enhancedQuery, topK, 0.3); // Lower threshold for code
// Restore original content in results
return results.map(result => ({
...result,
text: result.metadata?.originalContent || result.text,
}));
}
/**
* Enhance search query with code-specific understanding
*/
enhanceCodeQuery(query) {
const enhancements = [query];
// Add common programming synonyms
const synonyms = {
'function': ['method', 'func', 'fn', 'def'],
'class': ['struct', 'type', 'interface'],
'variable': ['var', 'const', 'let', 'val'],
'import': ['require', 'include', 'use'],
};
for (const [term, syns] of Object.entries(synonyms)) {
if (query.toLowerCase().includes(term)) {
enhancements.push(...syns);
}
}
return enhancements.join(' ');
}
}
//# sourceMappingURL=embeddings.js.map