ai-functions

/** * cacheMiddleware — content-addressable cache for `wrapLanguageModel` * * Implements the AI SDK cookbook's local-caching-middleware pattern * (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the * AI SDK 6 `LanguageModelV3Middleware` shape: * * - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }` * so a schema change (responseFormat.type === 'json' carries a `schema` * JSONSchema7) invalidates the entry. Generation parameters (temperature, * topP, etc.) are deliberately *not* part of the key for the eval-fixture * use case — flipping temperature shouldn't blow up a 5x verify-time win. * Callers who want strict keying should pass a custom `keyHash`. * * - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]` * array; `wrapStream` replays them via `simulateReadableStream` so consumers * see the same chunked event sequence on a hit. (`wrapGenerate` is the * common path; both share the same cache map.) * * - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are * evicted on access (lazy expiry — no background timer). * * - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to * a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture * sharing. Disk reads/writes are best-effort — IO failures fall through * to the wrapped model. * * - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the * middleware short-circuits to a passthrough — useful for production where * cache hits would be incorrect but the operator wants the same wrap chain. * Set to `'1'` (or any truthy non-empty string) to enable. * * @packageDocumentation */ import { simulateReadableStream } from 'ai' import type { LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3Middleware, LanguageModelV3StreamPart, LanguageModelV3StreamResult, } from '@ai-sdk/provider' import { hashKey } from '../cache.js' // ============================================================================ // Types // ============================================================================ /** Cached payload — both generate result and stream chunks under one key. */ interface CacheEntry { /** Result captured from `doGenerate`. Absent if the entry came from a stream call. */ generateResult?: LanguageModelV3GenerateResult /** Stream chunks captured from `doStream` (replayed via simulateReadableStream). */ streamChunks?: LanguageModelV3StreamPart[] /** Insert epoch ms — drives TTL eviction. */ createdAt: number } /** Pluggable cache store for cached LLM results. */ export interface CacheMiddlewareStore { get(key: string): CacheEntry | undefined set(key: string, value: CacheEntry): void delete(key: string): void } /** Options for {@link cacheMiddleware}. */ export interface CacheMiddlewareOptions { /** * Cache backend. `'memory'` uses a process-local Map; `'disk'` writes to * `.cache/v3-eval-cache.json` for cross-process fixture sharing. A custom * {@link CacheMiddlewareStore} can be passed instead. * * @default 'memory' */ store?: 'memory' | 'disk' | CacheMiddlewareStore /** * TTL in milliseconds. Entries older than `ttlMs` are evicted on access. * * @default 86_400_000 (24h) */ ttlMs?: number /** * Custom hash function for cache keys. Defaults to a stable hash of * `{ prompt, modelId, responseFormat }`. */ keyHash?: (params: LanguageModelV3CallOptions, modelId: string) => string /** * Optional override for the env gate. When `false`, the middleware acts * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check. */ enabled?: boolean /** Optional custom path for the disk store (defaults to `.cache/v3-eval-cache.json`). */ diskPath?: string } // ============================================================================ // Stores // ============================================================================ class MemoryStore implements CacheMiddlewareStore { private readonly map: Map<string, CacheEntry> = new Map() get(key: string): CacheEntry | undefined { return this.map.get(key) } set(key: string, value: CacheEntry): void { this.map.set(key, value) } delete(key: string): void { this.map.delete(key) } } /** * Disk-backed store. Best-effort — JSON parse / write errors fall through * silently so a corrupt cache file never blocks an LLM call. The whole map * is rewritten on each `set` (cheap for the eval-fixture use case which is * dominated by reads). */ class DiskStore implements CacheMiddlewareStore { private readonly path: string private cache: Map<string, CacheEntry> | null = null constructor(path: string) { this.path = path } private load(): Map<string, CacheEntry> { if (this.cache !== null) return this.cache this.cache = new Map() try { // eslint-disable-next-line @typescript-eslint/no-require-imports const fs = require('fs') as typeof import('fs') if (fs.existsSync(this.path)) { const raw = fs.readFileSync(this.path, 'utf-8') const parsed = JSON.parse(raw) as Record<string, CacheEntry> for (const [k, v] of Object.entries(parsed)) { this.cache.set(k, v) } } } catch { // best-effort } return this.cache } private flush(): void { if (this.cache === null) return try { // eslint-disable-next-line @typescript-eslint/no-require-imports const fs = require('fs') as typeof import('fs') // eslint-disable-next-line @typescript-eslint/no-require-imports const path = require('path') as typeof import('path') const dir = path.dirname(this.path) if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }) } const obj = Object.fromEntries(this.cache) fs.writeFileSync(this.path, JSON.stringify(obj), 'utf-8') } catch { // best-effort } } get(key: string): CacheEntry | undefined { return this.load().get(key) } set(key: string, value: CacheEntry): void { this.load().set(key, value) this.flush() } delete(key: string): void { this.load().delete(key) this.flush() } } // ============================================================================ // Helpers // ============================================================================ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000 function defaultKeyHash(params: LanguageModelV3CallOptions, modelId: string): string { // Stable hash of prompt + model + responseFormat (which carries the // schema for object generation). Generation knobs are deliberately // excluded so the eval-fixture cache survives temperature tweaks. return hashKey({ prompt: params.prompt, modelId, responseFormat: params.responseFormat, }) } function envGateEnabled(): boolean { const v = process.env['V3_EVAL_CACHE'] return typeof v === 'string' && v.length > 0 } function isExpired(entry: CacheEntry, ttlMs: number): boolean { return Date.now() - entry.createdAt > ttlMs } // ============================================================================ // Middleware // ============================================================================ /** * Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and * `doStream`; on a hit replays the cached payload, on a miss invokes the * downstream model and stores the result. * * Composition note: install **before** budget/trace so cache hits don't * pay the downstream model cost (the trace/budget middleware still see the * payload via the wrapped result they observe in their own `wrapGenerate`). * * @example * ```ts * import { wrapLanguageModel } from 'ai' * import { cacheMiddleware } from 'ai-functions' * * const model = wrapLanguageModel({ * model: openai('gpt-4o'), * middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }), * }) * ``` */ export function cacheMiddleware(options: CacheMiddlewareOptions = {}): LanguageModelV3Middleware { const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS const keyHash = options.keyHash ?? defaultKeyHash const store: CacheMiddlewareStore = options.store === undefined || options.store === 'memory' ? new MemoryStore() : options.store === 'disk' ? new DiskStore(options.diskPath ?? '.cache/v3-eval-cache.json') : options.store const enabled = options.enabled ?? envGateEnabled() return { specificationVersion: 'v3', async wrapGenerate({ doGenerate, params, model }) { if (!enabled) return doGenerate() const key = keyHash(params, model.modelId) const cached = store.get(key) if (cached !== undefined) { if (isExpired(cached, ttlMs)) { store.delete(key) } else if (cached.generateResult !== undefined) { return cached.generateResult } } const result = await doGenerate() store.set(key, { generateResult: result, createdAt: Date.now() }) return result }, async wrapStream({ doStream, params, model }) { if (!enabled) return doStream() const key = keyHash(params, model.modelId) const cached = store.get(key) if (cached !== undefined) { if (isExpired(cached, ttlMs)) { store.delete(key) } else if (cached.streamChunks !== undefined) { // Replay cached chunks via simulateReadableStream so consumers // see the same async iteration shape as a fresh call. const replay: LanguageModelV3StreamResult = { stream: simulateReadableStream<LanguageModelV3StreamPart>({ chunks: cached.streamChunks, initialDelayInMs: 0, chunkDelayInMs: 0, }), } return replay } } const result = await doStream() // Tee the stream: forward to caller, accumulate for cache. const chunks: LanguageModelV3StreamPart[] = [] const transformedStream = result.stream.pipeThrough( new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({ transform(chunk, controller) { chunks.push(chunk) controller.enqueue(chunk) }, flush() { store.set(key, { streamChunks: chunks, createdAt: Date.now() }) }, }) ) return { ...result, stream: transformedStream, } }, } }