ai-functions

Version:

Core AI primitives for building intelligent applications

103 lines • 4.58 kB

TypeScript

/** * cacheMiddleware — content-addressable cache for `wrapLanguageModel` * * Implements the AI SDK cookbook's local-caching-middleware pattern * (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the * AI SDK 6 `LanguageModelV3Middleware` shape: * * - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }` * so a schema change (responseFormat.type === 'json' carries a `schema` * JSONSchema7) invalidates the entry. Generation parameters (temperature, * topP, etc.) are deliberately *not* part of the key for the eval-fixture * use case — flipping temperature shouldn't blow up a 5x verify-time win. * Callers who want strict keying should pass a custom `keyHash`. * * - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]` * array; `wrapStream` replays them via `simulateReadableStream` so consumers * see the same chunked event sequence on a hit. (`wrapGenerate` is the * common path; both share the same cache map.) * * - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are * evicted on access (lazy expiry — no background timer). * * - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to * a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture * sharing. Disk reads/writes are best-effort — IO failures fall through * to the wrapped model. * * - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the * middleware short-circuits to a passthrough — useful for production where * cache hits would be incorrect but the operator wants the same wrap chain. * Set to `'1'` (or any truthy non-empty string) to enable. * * @packageDocumentation */ import type { LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3Middleware, LanguageModelV3StreamPart } from '@ai-sdk/provider'; /** Cached payload — both generate result and stream chunks under one key. */ interface CacheEntry { /** Result captured from `doGenerate`. Absent if the entry came from a stream call. */ generateResult?: LanguageModelV3GenerateResult; /** Stream chunks captured from `doStream` (replayed via simulateReadableStream). */ streamChunks?: LanguageModelV3StreamPart[]; /** Insert epoch ms — drives TTL eviction. */ createdAt: number; } /** Pluggable cache store for cached LLM results. */ export interface CacheMiddlewareStore { get(key: string): CacheEntry | undefined; set(key: string, value: CacheEntry): void; delete(key: string): void; } /** Options for {@link cacheMiddleware}. */ export interface CacheMiddlewareOptions { /** * Cache backend. `'memory'` uses a process-local Map; `'disk'` writes to * `.cache/v3-eval-cache.json` for cross-process fixture sharing. A custom * {@link CacheMiddlewareStore} can be passed instead. * * @default 'memory' */ store?: 'memory' | 'disk' | CacheMiddlewareStore; /** * TTL in milliseconds. Entries older than `ttlMs` are evicted on access. * * @default 86_400_000 (24h) */ ttlMs?: number; /** * Custom hash function for cache keys. Defaults to a stable hash of * `{ prompt, modelId, responseFormat }`. */ keyHash?: (params: LanguageModelV3CallOptions, modelId: string) => string; /** * Optional override for the env gate. When `false`, the middleware acts * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check. */ enabled?: boolean; /** Optional custom path for the disk store (defaults to `.cache/v3-eval-cache.json`). */ diskPath?: string; } /** * Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and * `doStream`; on a hit replays the cached payload, on a miss invokes the * downstream model and stores the result. * * Composition note: install **before** budget/trace so cache hits don't * pay the downstream model cost (the trace/budget middleware still see the * payload via the wrapped result they observe in their own `wrapGenerate`). * * @example * ```ts * import { wrapLanguageModel } from 'ai' * import { cacheMiddleware } from 'ai-functions' * * const model = wrapLanguageModel({ * model: openai('gpt-4o'), * middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }), * }) * ``` */ export declare function cacheMiddleware(options?: CacheMiddlewareOptions): LanguageModelV3Middleware; export {}; //# sourceMappingURL=cache.d.ts.map