UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

423 lines (345 loc) 14.6 kB
/** * Caching Strategies Example * * This example demonstrates intelligent caching for AI operations using ai-functions. * It shows how to: * - Use MemoryCache for general caching * - Cache generation results with GenerationCache * - Cache embeddings with EmbeddingCache * - Wrap functions with automatic caching * * @example * ```bash * ANTHROPIC_API_KEY=sk-... npx tsx examples/12-caching-strategies.ts * ``` */ import { write, ai, list, configure, MemoryCache, GenerationCache, EmbeddingCache, withCache, hashKey, createCacheKey, type CacheEntry, type CacheStats, } from '../src/index.js' // ============================================================================ // Basic MemoryCache // ============================================================================ async function memoryCacheExample(): Promise<void> { console.log('\n=== MemoryCache - Basic Usage ===\n') const cache = new MemoryCache({ maxSize: 100, // Max 100 entries defaultTTL: 60000, // 1 minute TTL }) // Store and retrieve values await cache.set('greeting', 'Hello, World!') await cache.set('number', 42, { ttl: 5000 }) // 5 second TTL await cache.set('user', { name: 'Alice', email: 'alice@example.com' }, { ttl: 30000 }) console.log('Stored values:') console.log(` greeting: ${await cache.get('greeting')}`) console.log(` number: ${await cache.get('number')}`) console.log(` user: ${JSON.stringify(await cache.get('user'))}`) // Check if key exists console.log(`\n Has 'greeting': ${await cache.has('greeting')}`) console.log(` Has 'missing': ${await cache.has('missing')}`) // Get stats const stats = cache.getStats() console.log('\nCache Stats:') console.log(` Size: ${stats.size}`) console.log(` Hits: ${stats.hits}`) console.log(` Misses: ${stats.misses}`) console.log(` Hit Rate: ${((stats.hitRate || 0) * 100).toFixed(1)}%`) // Clear specific key await cache.delete('number') console.log(`\n After delete, has 'number': ${await cache.has('number')}`) // Clear all await cache.clear() console.log(` After clear, size: ${cache.getStats().size}`) } // ============================================================================ // GenerationCache // ============================================================================ async function generationCacheExample(): Promise<void> { console.log('\n=== GenerationCache - AI Response Caching ===\n') const cache = new GenerationCache({ defaultTTL: 3600000, // 1 hour maxSize: 1000, }) // Cache key is based on prompt and model const cacheKey1 = { prompt: 'What is TypeScript?', model: 'sonnet' } const cacheKey2 = { prompt: 'What is TypeScript?', model: 'gpt-4o' } // Check cache before generating let result = await cache.get(cacheKey1) if (!result) { console.log('Cache miss - generating response...') result = await write`What is TypeScript? Answer in one sentence.` // Store in cache await cache.set(cacheKey1, result) console.log(`Cached response: "${result.substring(0, 50)}..."`) } else { console.log(`Cache hit: "${result.substring(0, 50)}..."`) } // Same prompt again - should hit cache console.log('\nSecond request with same prompt...') const cached = await cache.get(cacheKey1) console.log(cached ? 'Cache HIT' : 'Cache MISS') // Different model - different cache entry console.log('\nSame prompt, different model...') const differentModel = await cache.get(cacheKey2) console.log(differentModel ? 'Cache HIT' : 'Cache MISS (different model)') // Get usage metrics console.log('\nGeneration Cache Metrics:') const stats = cache.getStats() console.log(` Entries: ${stats.size}`) console.log(` Hits: ${stats.hits}`) console.log(` Misses: ${stats.misses}`) } // ============================================================================ // EmbeddingCache with Batch Support // ============================================================================ async function embeddingCacheExample(): Promise<void> { console.log('\n=== EmbeddingCache - Embedding Storage ===\n') const cache = new EmbeddingCache({ defaultTTL: 86400000, // 24 hours - embeddings change less frequently maxSize: 10000, // Store up to 10k embeddings }) // Simulate embedding generation const generateEmbedding = (text: string): number[] => { // Simplified embedding simulation const hash = text.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) return Array(128) .fill(0) .map((_, i) => Math.sin(hash + i) / 2) } // Batch embedding with cache const texts = [ 'The quick brown fox', 'Jumps over the lazy dog', 'Hello world', 'The quick brown fox', // Duplicate ] console.log('Batch embedding with cache...\n') // First, check which are already cached const cacheResults = await cache.getMany(texts, { model: 'text-embedding-3-small' }) console.log(` Hits: ${cacheResults.hits.length}`) console.log(` Misses: ${cacheResults.misses.length}`) // Generate embeddings for misses const newEmbeddings: Record<string, number[]> = {} for (const text of cacheResults.misses) { console.log(` Generating embedding for: "${text.substring(0, 30)}..."`) newEmbeddings[text] = generateEmbedding(text) } // Store new embeddings await cache.setMany(newEmbeddings, { model: 'text-embedding-3-small' }) console.log(`\n Cached ${Object.keys(newEmbeddings).length} new embeddings`) // Second batch request - should hit more console.log('\nSecond batch request...') const secondResults = await cache.getMany(texts, { model: 'text-embedding-3-small' }) console.log(` Hits: ${secondResults.hits.length}`) console.log(` Misses: ${secondResults.misses.length}`) // Calculate savings const savingsPercent = (secondResults.hits.length / texts.length) * 100 console.log( `\n Cache efficiency: ${savingsPercent.toFixed(0)}% (${secondResults.hits.length}/${ texts.length } from cache)` ) } // ============================================================================ // withCache Function Wrapper // ============================================================================ async function withCacheExample(): Promise<void> { console.log('\n=== withCache - Automatic Function Caching ===\n') const cache = new MemoryCache({ maxSize: 100 }) // Create a cached version of any function const generateSummary = async (text: string): Promise<string> => { console.log(' [Generating summary...]') // Simulate AI call return `Summary of: ${text.substring(0, 30)}...` } // Wrap with caching const cachedSummary = withCache(cache, generateSummary, { keyFn: (text: string) => `summary:${hashKey(text)}`, ttl: 60000, }) // First call - generates console.log('First call:') const result1 = await cachedSummary('This is a long article about AI and machine learning...') console.log(` Result: ${result1}`) // Second call with same input - from cache console.log('\nSecond call (same input):') const result2 = await cachedSummary('This is a long article about AI and machine learning...') console.log(` Result: ${result2}`) // Different input - generates console.log('\nThird call (different input):') const result3 = await cachedSummary('A completely different article about web development...') console.log(` Result: ${result3}`) console.log('\nCache stats:', cache.getStats()) } // ============================================================================ // Cache Key Strategies // ============================================================================ async function cacheKeyStrategiesExample(): Promise<void> { console.log('\n=== Cache Key Strategies ===\n') // Strategy 1: Simple string hash const prompt1 = 'What is TypeScript?' const key1 = hashKey(prompt1) console.log(`Simple hash:`) console.log(` Input: "${prompt1}"`) console.log(` Key: ${key1}`) // Strategy 2: Composite key with model const key2 = createCacheKey({ prompt: prompt1, model: 'sonnet', temperature: 0.7, }) console.log(`\nComposite key:`) console.log(` Input: { prompt, model, temperature }`) console.log(` Key: ${key2}`) // Strategy 3: Normalized prompt (ignore whitespace differences) const normalizePrompt = (p: string): string => p.trim().replace(/\s+/g, ' ').toLowerCase() const prompt2 = ' What is TypeScript? ' console.log(`\nNormalized prompt:`) console.log(` Original: "${prompt2}"`) console.log(` Normalized: "${normalizePrompt(prompt2)}"`) console.log( ` Same key as prompt1: ${ hashKey(normalizePrompt(prompt2)) === hashKey(normalizePrompt(prompt1)) }` ) // Strategy 4: Semantic caching key (for similar prompts) // This would use embeddings to find similar cached entries console.log(`\nSemantic caching (conceptual):`) console.log(` "What is TS?" would match "What is TypeScript?"`) console.log(` Requires embedding-based similarity lookup`) } // ============================================================================ // Cache Invalidation Patterns // ============================================================================ async function cacheInvalidationExample(): Promise<void> { console.log('\n=== Cache Invalidation Patterns ===\n') const cache = new MemoryCache({ maxSize: 100 }) // Pattern 1: TTL-based expiration console.log('1. TTL-based expiration:') await cache.set('short-lived', 'data', { ttl: 1000 }) // 1 second console.log(` Set with 1s TTL: ${await cache.get('short-lived')}`) await new Promise((r) => setTimeout(r, 1100)) console.log(` After 1.1s: ${await cache.get('short-lived')} (expired)`) // Pattern 2: Manual invalidation console.log('\n2. Manual invalidation:') await cache.set('user:123', { name: 'Alice' }) console.log(` Before invalidation: ${JSON.stringify(await cache.get('user:123'))}`) await cache.delete('user:123') console.log(` After invalidation: ${await cache.get('user:123')}`) // Pattern 3: Pattern-based invalidation console.log('\n3. Pattern-based invalidation (conceptual):') console.log(` cache.deletePattern('user:*') // Delete all user entries`) console.log(` Useful when a user's data changes globally`) // Pattern 4: Version-based keys console.log('\n4. Version-based keys:') const version = 'v2' const versionedKey = `${version}:prompt:${hashKey('What is TypeScript?')}` console.log(` Key includes version: ${versionedKey}`) console.log(` Incrementing version invalidates all old entries`) } // ============================================================================ // Cost Savings Calculation // ============================================================================ async function costSavingsExample(): Promise<void> { console.log('\n=== Cost Savings from Caching ===\n') // Simulate request patterns const totalRequests = 1000 const uniquePrompts = 100 const hitRate = 0.7 // 70% of requests are repeats // Pricing assumptions (per 1M tokens) const inputCost = 3.0 // $3 per 1M input tokens const outputCost = 15.0 // $15 per 1M output tokens const avgInputTokens = 100 const avgOutputTokens = 200 // Calculate without caching const costPerRequest = (avgInputTokens * inputCost + avgOutputTokens * outputCost) / 1000000 const totalCostNoCache = totalRequests * costPerRequest // Calculate with caching (only pay for unique prompts) const cachedRequests = totalRequests * hitRate const uncachedRequests = totalRequests - cachedRequests const totalCostWithCache = uncachedRequests * costPerRequest const savings = totalCostNoCache - totalCostWithCache const savingsPercent = (savings / totalCostNoCache) * 100 console.log('Scenario:') console.log(` Total requests: ${totalRequests}`) console.log(` Unique prompts: ${uniquePrompts}`) console.log(` Cache hit rate: ${(hitRate * 100).toFixed(0)}%`) console.log(`\nCost Analysis:`) console.log(` Without caching: $${totalCostNoCache.toFixed(4)}`) console.log(` With caching: $${totalCostWithCache.toFixed(4)}`) console.log(` Savings: $${savings.toFixed(4)} (${savingsPercent.toFixed(0)}%)`) console.log(`\nRecommendation:`) console.log(` For ${totalRequests} requests/day at ${(hitRate * 100).toFixed(0)}% hit rate:`) console.log(` Monthly savings: ~$${(savings * 30).toFixed(2)}`) } // ============================================================================ // Production Recommendations // ============================================================================ function showProductionRecommendations(): void { console.log('\n=== Production Caching Recommendations ===\n') console.log(` 1. CACHE STORAGE SELECTION - Development: MemoryCache (simple, no setup) - Production: Redis/Memcached (persistent, distributed) - Edge: Cloudflare KV, Durable Objects 2. TTL STRATEGIES - Embeddings: 24-48 hours (stable, expensive) - Generations: 1-4 hours (may need freshness) - Lists/Classifications: 15-60 minutes (volatile) - User-specific: 5-15 minutes (personalized) 3. KEY DESIGN - Include: prompt hash, model, temperature - Normalize: whitespace, case (for similar queries) - Version: include cache version for easy invalidation 4. SIZE MANAGEMENT - Set maxSize based on memory budget - Monitor hit rates to optimize size - Use LRU eviction for unpredictable access patterns 5. INVALIDATION - TTL for automatic expiration - Manual invalidation for data changes - Version bumps for schema changes 6. MONITORING - Track hit/miss rates - Monitor cache size over time - Alert on low hit rates - Log cache key patterns for optimization `) } // ============================================================================ // Main // ============================================================================ async function main() { console.log('\n=== Caching Strategies Example ===') configure({ model: 'sonnet', provider: 'anthropic', }) await memoryCacheExample() await generationCacheExample() await embeddingCacheExample() await withCacheExample() await cacheKeyStrategiesExample() await cacheInvalidationExample() await costSavingsExample() showProductionRecommendations() } main() .then(() => { console.log('\n=== Example Complete ===\n') process.exit(0) }) .catch((error) => { console.error('\nError:', error.message) process.exit(1) })