lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
78 lines (67 loc) • 1.89 kB
JavaScript
/**
* In-memory LRU cache for query embeddings.
*
* Used by Phase 3.1 (kNN router) and Phase 4.3 (drift detector) to avoid
* repeated embedding calls for queries we've already seen.
*/
const crypto = require('crypto');
const logger = require('../logger');
const DEFAULT_MAX = 5000;
class EmbeddingCache {
constructor(maxSize = DEFAULT_MAX) {
this.maxSize = maxSize;
this.cache = new Map(); // hash -> { embedding, lastAccess }
this.hits = 0;
this.misses = 0;
}
_hash(text) {
return crypto.createHash('sha1').update(text).digest('hex');
}
get(text) {
if (!text || typeof text !== 'string') return null;
const key = this._hash(text);
const entry = this.cache.get(key);
if (!entry) {
this.misses++;
return null;
}
// LRU touch
this.cache.delete(key);
entry.lastAccess = Date.now();
this.cache.set(key, entry);
this.hits++;
return entry.embedding;
}
set(text, embedding) {
if (!text || !embedding) return;
const key = this._hash(text);
if (this.cache.has(key)) this.cache.delete(key);
this.cache.set(key, { embedding, lastAccess: Date.now() });
if (this.cache.size > this.maxSize) {
// Evict least-recently-used (Map keeps insertion/access order)
const oldest = this.cache.keys().next().value;
this.cache.delete(oldest);
}
}
getStats() {
const total = this.hits + this.misses;
return {
size: this.cache.size,
maxSize: this.maxSize,
hits: this.hits,
misses: this.misses,
hitRate: total > 0 ? (this.hits / total).toFixed(3) : '0',
};
}
clear() {
this.cache.clear();
this.hits = 0;
this.misses = 0;
}
}
let _instance = null;
function getEmbeddingCache() {
if (!_instance) _instance = new EmbeddingCache();
return _instance;
}
module.exports = { EmbeddingCache, getEmbeddingCache };