UNPKG

ultimate-mcp-server

Version:

The definitive all-in-one Model Context Protocol server for AI-assisted coding across 30+ platforms

258 lines 8.24 kB
/** * Prompt Caching for LLM Providers * Implements caching strategies to reduce token usage and costs */ import { createHash } from 'crypto'; import { Logger } from '../../utils/logger.js'; export class PromptCache { cache = new Map(); logger; options; stats = { hits: 0, misses: 0, tokensSaved: 0, costSaved: 0 }; constructor(options = {}) { this.logger = new Logger('PromptCache'); this.options = { maxSize: options.maxSize || 1000, ttl: options.ttl || 3600000, // 1 hour default enableSimilarity: options.enableSimilarity || false, threshold: options.threshold || 0.95 }; } /** * Generate cache key from messages */ generateKey(messages, model) { const content = messages.map(m => `${m.role}:${m.content}`).join('|'); return createHash('sha256') .update(`${model}:${content}`) .digest('hex'); } /** * Check if cache entry is still valid */ isValid(entry) { return Date.now() - entry.timestamp < this.options.ttl; } /** * Get cached response if available */ get(messages, model, provider) { const key = this.generateKey(messages, model); const entry = this.cache.get(key); if (entry && this.isValid(entry)) { entry.hits++; this.stats.hits++; this.stats.tokensSaved += entry.tokenCount; this.logger.debug(`Cache hit for ${model} (${entry.hits} hits, ${entry.tokenCount} tokens saved)`); return entry.response; } // Try similarity matching if enabled if (this.options.enableSimilarity) { const similar = this.findSimilar(messages, model, provider); if (similar) { this.stats.hits++; this.stats.tokensSaved += similar.tokenCount; this.logger.debug(`Similar cache hit for ${model} (${similar.tokenCount} tokens saved)`); return similar.response; } } this.stats.misses++; return null; } /** * Store response in cache */ set(messages, response, model, provider, tokenCount) { // Enforce max size if (this.cache.size >= this.options.maxSize) { this.evictOldest(); } const key = this.generateKey(messages, model); const entry = { key, response, messages, timestamp: Date.now(), tokenCount, model, provider, hits: 0 }; this.cache.set(key, entry); this.logger.debug(`Cached response for ${model} (${tokenCount} tokens)`); } /** * Find semantically similar cached entry */ findSimilar(messages, model, provider) { // Simple implementation - can be enhanced with embeddings const targetLength = messages.length; for (const [_, entry] of this.cache) { if (entry.model !== model || entry.provider !== provider) continue; if (!this.isValid(entry)) continue; // Check if message count is similar if (Math.abs(entry.messages.length - targetLength) > 1) continue; // Check last message similarity (simple approach) const lastTarget = messages[messages.length - 1]; const lastCached = entry.messages[entry.messages.length - 1]; if (lastTarget.role === lastCached.role) { const similarity = this.calculateSimilarity(lastTarget.content, lastCached.content); if (similarity >= this.options.threshold) { return entry; } } } return null; } /** * Calculate simple similarity between two strings */ calculateSimilarity(a, b) { // Simple Jaccard similarity - can be replaced with better algorithms const setA = new Set(a.toLowerCase().split(/\s+/)); const setB = new Set(b.toLowerCase().split(/\s+/)); const intersection = new Set([...setA].filter(x => setB.has(x))); const union = new Set([...setA, ...setB]); return intersection.size / union.size; } /** * Evict oldest entries */ evictOldest() { let oldest = null; let oldestKey = null; for (const [key, entry] of this.cache) { if (!oldest || entry.timestamp < oldest.timestamp) { oldest = entry; oldestKey = key; } } if (oldestKey) { this.cache.delete(oldestKey); this.logger.debug(`Evicted cache entry: ${oldestKey}`); } } /** * Clear expired entries */ clearExpired() { const now = Date.now(); let cleared = 0; for (const [key, entry] of this.cache) { if (now - entry.timestamp > this.options.ttl) { this.cache.delete(key); cleared++; } } if (cleared > 0) { this.logger.debug(`Cleared ${cleared} expired cache entries`); } } /** * Get cache statistics */ getStats() { const totalEntries = this.cache.size; const avgHits = totalEntries > 0 ? Array.from(this.cache.values()).reduce((sum, e) => sum + e.hits, 0) / totalEntries : 0; return { ...this.stats, totalEntries, avgHits, hitRate: this.stats.hits / (this.stats.hits + this.stats.misses) || 0, memorySizeEstimate: this.estimateMemorySize() }; } /** * Estimate memory usage */ estimateMemorySize() { let size = 0; for (const entry of this.cache.values()) { size += JSON.stringify(entry).length * 2; // Rough estimate (2 bytes per char) } return size; } /** * Clear all cache */ clear() { this.cache.clear(); this.logger.info('Cache cleared'); } /** * Export cache for persistence */ export() { return JSON.stringify({ entries: Array.from(this.cache.entries()), stats: this.stats }); } /** * Import cache from persistence */ import(data) { try { const parsed = JSON.parse(data); this.cache = new Map(parsed.entries); this.stats = parsed.stats; this.clearExpired(); this.logger.info(`Imported ${this.cache.size} cache entries`); } catch (error) { this.logger.error('Failed to import cache:', error); } } } /** * Anthropic-specific prompt caching * Implements Anthropic's beta caching feature */ export class AnthropicPromptCache { logger; cacheBreakpoints = new Map(); constructor() { this.logger = new Logger('AnthropicPromptCache'); } /** * Prepare messages with cache control for Anthropic API */ prepareCachedMessages(messages) { const result = []; // Mark system message and initial context for caching for (let i = 0; i < messages.length; i++) { const msg = messages[i]; const formatted = { role: msg.role === 'system' ? 'assistant' : msg.role, content: msg.content }; // Add cache control to system messages and long context if (msg.role === 'system' || (i < 3 && msg.content.length > 1000)) { formatted.cache_control = { type: 'ephemeral' }; } result.push(formatted); } return result; } /** * Calculate potential token savings from caching */ estimateSavings(messages, cachedTokens) { // Anthropic charges 25% for cached tokens const savings = cachedTokens * 0.75; this.logger.debug(`Estimated savings: ${savings} tokens (${cachedTokens} cached)`); return savings; } } //# sourceMappingURL=prompt-cache.js.map