ultimate-mcp-server
Version:
The definitive all-in-one Model Context Protocol server for AI-assisted coding across 30+ platforms
258 lines • 8.24 kB
JavaScript
/**
* Prompt Caching for LLM Providers
* Implements caching strategies to reduce token usage and costs
*/
import { createHash } from 'crypto';
import { Logger } from '../../utils/logger.js';
export class PromptCache {
cache = new Map();
logger;
options;
stats = {
hits: 0,
misses: 0,
tokensSaved: 0,
costSaved: 0
};
constructor(options = {}) {
this.logger = new Logger('PromptCache');
this.options = {
maxSize: options.maxSize || 1000,
ttl: options.ttl || 3600000, // 1 hour default
enableSimilarity: options.enableSimilarity || false,
threshold: options.threshold || 0.95
};
}
/**
* Generate cache key from messages
*/
generateKey(messages, model) {
const content = messages.map(m => `${m.role}:${m.content}`).join('|');
return createHash('sha256')
.update(`${model}:${content}`)
.digest('hex');
}
/**
* Check if cache entry is still valid
*/
isValid(entry) {
return Date.now() - entry.timestamp < this.options.ttl;
}
/**
* Get cached response if available
*/
get(messages, model, provider) {
const key = this.generateKey(messages, model);
const entry = this.cache.get(key);
if (entry && this.isValid(entry)) {
entry.hits++;
this.stats.hits++;
this.stats.tokensSaved += entry.tokenCount;
this.logger.debug(`Cache hit for ${model} (${entry.hits} hits, ${entry.tokenCount} tokens saved)`);
return entry.response;
}
// Try similarity matching if enabled
if (this.options.enableSimilarity) {
const similar = this.findSimilar(messages, model, provider);
if (similar) {
this.stats.hits++;
this.stats.tokensSaved += similar.tokenCount;
this.logger.debug(`Similar cache hit for ${model} (${similar.tokenCount} tokens saved)`);
return similar.response;
}
}
this.stats.misses++;
return null;
}
/**
* Store response in cache
*/
set(messages, response, model, provider, tokenCount) {
// Enforce max size
if (this.cache.size >= this.options.maxSize) {
this.evictOldest();
}
const key = this.generateKey(messages, model);
const entry = {
key,
response,
messages,
timestamp: Date.now(),
tokenCount,
model,
provider,
hits: 0
};
this.cache.set(key, entry);
this.logger.debug(`Cached response for ${model} (${tokenCount} tokens)`);
}
/**
* Find semantically similar cached entry
*/
findSimilar(messages, model, provider) {
// Simple implementation - can be enhanced with embeddings
const targetLength = messages.length;
for (const [_, entry] of this.cache) {
if (entry.model !== model || entry.provider !== provider)
continue;
if (!this.isValid(entry))
continue;
// Check if message count is similar
if (Math.abs(entry.messages.length - targetLength) > 1)
continue;
// Check last message similarity (simple approach)
const lastTarget = messages[messages.length - 1];
const lastCached = entry.messages[entry.messages.length - 1];
if (lastTarget.role === lastCached.role) {
const similarity = this.calculateSimilarity(lastTarget.content, lastCached.content);
if (similarity >= this.options.threshold) {
return entry;
}
}
}
return null;
}
/**
* Calculate simple similarity between two strings
*/
calculateSimilarity(a, b) {
// Simple Jaccard similarity - can be replaced with better algorithms
const setA = new Set(a.toLowerCase().split(/\s+/));
const setB = new Set(b.toLowerCase().split(/\s+/));
const intersection = new Set([...setA].filter(x => setB.has(x)));
const union = new Set([...setA, ...setB]);
return intersection.size / union.size;
}
/**
* Evict oldest entries
*/
evictOldest() {
let oldest = null;
let oldestKey = null;
for (const [key, entry] of this.cache) {
if (!oldest || entry.timestamp < oldest.timestamp) {
oldest = entry;
oldestKey = key;
}
}
if (oldestKey) {
this.cache.delete(oldestKey);
this.logger.debug(`Evicted cache entry: ${oldestKey}`);
}
}
/**
* Clear expired entries
*/
clearExpired() {
const now = Date.now();
let cleared = 0;
for (const [key, entry] of this.cache) {
if (now - entry.timestamp > this.options.ttl) {
this.cache.delete(key);
cleared++;
}
}
if (cleared > 0) {
this.logger.debug(`Cleared ${cleared} expired cache entries`);
}
}
/**
* Get cache statistics
*/
getStats() {
const totalEntries = this.cache.size;
const avgHits = totalEntries > 0
? Array.from(this.cache.values()).reduce((sum, e) => sum + e.hits, 0) / totalEntries
: 0;
return {
...this.stats,
totalEntries,
avgHits,
hitRate: this.stats.hits / (this.stats.hits + this.stats.misses) || 0,
memorySizeEstimate: this.estimateMemorySize()
};
}
/**
* Estimate memory usage
*/
estimateMemorySize() {
let size = 0;
for (const entry of this.cache.values()) {
size += JSON.stringify(entry).length * 2; // Rough estimate (2 bytes per char)
}
return size;
}
/**
* Clear all cache
*/
clear() {
this.cache.clear();
this.logger.info('Cache cleared');
}
/**
* Export cache for persistence
*/
export() {
return JSON.stringify({
entries: Array.from(this.cache.entries()),
stats: this.stats
});
}
/**
* Import cache from persistence
*/
import(data) {
try {
const parsed = JSON.parse(data);
this.cache = new Map(parsed.entries);
this.stats = parsed.stats;
this.clearExpired();
this.logger.info(`Imported ${this.cache.size} cache entries`);
}
catch (error) {
this.logger.error('Failed to import cache:', error);
}
}
}
/**
* Anthropic-specific prompt caching
* Implements Anthropic's beta caching feature
*/
export class AnthropicPromptCache {
logger;
cacheBreakpoints = new Map();
constructor() {
this.logger = new Logger('AnthropicPromptCache');
}
/**
* Prepare messages with cache control for Anthropic API
*/
prepareCachedMessages(messages) {
const result = [];
// Mark system message and initial context for caching
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
const formatted = {
role: msg.role === 'system' ? 'assistant' : msg.role,
content: msg.content
};
// Add cache control to system messages and long context
if (msg.role === 'system' ||
(i < 3 && msg.content.length > 1000)) {
formatted.cache_control = { type: 'ephemeral' };
}
result.push(formatted);
}
return result;
}
/**
* Calculate potential token savings from caching
*/
estimateSavings(messages, cachedTokens) {
// Anthropic charges 25% for cached tokens
const savings = cachedTokens * 0.75;
this.logger.debug(`Estimated savings: ${savings} tokens (${cachedTokens} cached)`);
return savings;
}
}
//# sourceMappingURL=prompt-cache.js.map