UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

228 lines • 8.61 kB

JavaScript

/** * cacheMiddleware — content-addressable cache for `wrapLanguageModel` * * Implements the AI SDK cookbook's local-caching-middleware pattern * (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the * AI SDK 6 `LanguageModelV3Middleware` shape: * * - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }` * so a schema change (responseFormat.type === 'json' carries a `schema` * JSONSchema7) invalidates the entry. Generation parameters (temperature, * topP, etc.) are deliberately *not* part of the key for the eval-fixture * use case — flipping temperature shouldn't blow up a 5x verify-time win. * Callers who want strict keying should pass a custom `keyHash`. * * - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]` * array; `wrapStream` replays them via `simulateReadableStream` so consumers * see the same chunked event sequence on a hit. (`wrapGenerate` is the * common path; both share the same cache map.) * * - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are * evicted on access (lazy expiry — no background timer). * * - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to * a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture * sharing. Disk reads/writes are best-effort — IO failures fall through * to the wrapped model. * * - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the * middleware short-circuits to a passthrough — useful for production where * cache hits would be incorrect but the operator wants the same wrap chain. * Set to `'1'` (or any truthy non-empty string) to enable. * * @packageDocumentation */ import { simulateReadableStream } from 'ai'; import { hashKey } from '../cache.js'; // ============================================================================ // Stores // ============================================================================ class MemoryStore { map = new Map(); get(key) { return this.map.get(key); } set(key, value) { this.map.set(key, value); } delete(key) { this.map.delete(key); } } /** * Disk-backed store. Best-effort — JSON parse / write errors fall through * silently so a corrupt cache file never blocks an LLM call. The whole map * is rewritten on each `set` (cheap for the eval-fixture use case which is * dominated by reads). */ class DiskStore { path; cache = null; constructor(path) { this.path = path; } load() { if (this.cache !== null) return this.cache; this.cache = new Map(); try { // eslint-disable-next-line @typescript-eslint/no-require-imports const fs = require('fs'); if (fs.existsSync(this.path)) { const raw = fs.readFileSync(this.path, 'utf-8'); const parsed = JSON.parse(raw); for (const [k, v] of Object.entries(parsed)) { this.cache.set(k, v); } } } catch { // best-effort } return this.cache; } flush() { if (this.cache === null) return; try { // eslint-disable-next-line @typescript-eslint/no-require-imports const fs = require('fs'); // eslint-disable-next-line @typescript-eslint/no-require-imports const path = require('path'); const dir = path.dirname(this.path); if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } const obj = Object.fromEntries(this.cache); fs.writeFileSync(this.path, JSON.stringify(obj), 'utf-8'); } catch { // best-effort } } get(key) { return this.load().get(key); } set(key, value) { this.load().set(key, value); this.flush(); } delete(key) { this.load().delete(key); this.flush(); } } // ============================================================================ // Helpers // ============================================================================ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000; function defaultKeyHash(params, modelId) { // Stable hash of prompt + model + responseFormat (which carries the // schema for object generation). Generation knobs are deliberately // excluded so the eval-fixture cache survives temperature tweaks. return hashKey({ prompt: params.prompt, modelId, responseFormat: params.responseFormat, }); } function envGateEnabled() { const v = process.env['V3_EVAL_CACHE']; return typeof v === 'string' && v.length > 0; } function isExpired(entry, ttlMs) { return Date.now() - entry.createdAt > ttlMs; } // ============================================================================ // Middleware // ============================================================================ /** * Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and * `doStream`; on a hit replays the cached payload, on a miss invokes the * downstream model and stores the result. * * Composition note: install **before** budget/trace so cache hits don't * pay the downstream model cost (the trace/budget middleware still see the * payload via the wrapped result they observe in their own `wrapGenerate`). * * @example * ```ts * import { wrapLanguageModel } from 'ai' * import { cacheMiddleware } from 'ai-functions' * * const model = wrapLanguageModel({ * model: openai('gpt-4o'), * middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }), * }) * ``` */ export function cacheMiddleware(options = {}) { const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS; const keyHash = options.keyHash ?? defaultKeyHash; const store = options.store === undefined || options.store === 'memory' ? new MemoryStore() : options.store === 'disk' ? new DiskStore(options.diskPath ?? '.cache/v3-eval-cache.json') : options.store; const enabled = options.enabled ?? envGateEnabled(); return { specificationVersion: 'v3', async wrapGenerate({ doGenerate, params, model }) { if (!enabled) return doGenerate(); const key = keyHash(params, model.modelId); const cached = store.get(key); if (cached !== undefined) { if (isExpired(cached, ttlMs)) { store.delete(key); } else if (cached.generateResult !== undefined) { return cached.generateResult; } } const result = await doGenerate(); store.set(key, { generateResult: result, createdAt: Date.now() }); return result; }, async wrapStream({ doStream, params, model }) { if (!enabled) return doStream(); const key = keyHash(params, model.modelId); const cached = store.get(key); if (cached !== undefined) { if (isExpired(cached, ttlMs)) { store.delete(key); } else if (cached.streamChunks !== undefined) { // Replay cached chunks via simulateReadableStream so consumers // see the same async iteration shape as a fresh call. const replay = { stream: simulateReadableStream({ chunks: cached.streamChunks, initialDelayInMs: 0, chunkDelayInMs: 0, }), }; return replay; } } const result = await doStream(); // Tee the stream: forward to caller, accumulate for cache. const chunks = []; const transformedStream = result.stream.pipeThrough(new TransformStream({ transform(chunk, controller) { chunks.push(chunk); controller.enqueue(chunk); }, flush() { store.set(key, { streamChunks: chunks, createdAt: Date.now() }); }, })); return { ...result, stream: transformedStream, }; }, }; } //# sourceMappingURL=cache.js.map