ai-functions
Version:
Core AI primitives for building intelligent applications
103 lines • 4.58 kB
TypeScript
/**
* cacheMiddleware — content-addressable cache for `wrapLanguageModel`
*
* Implements the AI SDK cookbook's local-caching-middleware pattern
* (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the
* AI SDK 6 `LanguageModelV3Middleware` shape:
*
* - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }`
* so a schema change (responseFormat.type === 'json' carries a `schema`
* JSONSchema7) invalidates the entry. Generation parameters (temperature,
* topP, etc.) are deliberately *not* part of the key for the eval-fixture
* use case — flipping temperature shouldn't blow up a 5x verify-time win.
* Callers who want strict keying should pass a custom `keyHash`.
*
* - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]`
* array; `wrapStream` replays them via `simulateReadableStream` so consumers
* see the same chunked event sequence on a hit. (`wrapGenerate` is the
* common path; both share the same cache map.)
*
* - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are
* evicted on access (lazy expiry — no background timer).
*
* - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to
* a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture
* sharing. Disk reads/writes are best-effort — IO failures fall through
* to the wrapped model.
*
* - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the
* middleware short-circuits to a passthrough — useful for production where
* cache hits would be incorrect but the operator wants the same wrap chain.
* Set to `'1'` (or any truthy non-empty string) to enable.
*
* @packageDocumentation
*/
import type { LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3Middleware, LanguageModelV3StreamPart } from '@ai-sdk/provider';
/** Cached payload — both generate result and stream chunks under one key. */
interface CacheEntry {
/** Result captured from `doGenerate`. Absent if the entry came from a stream call. */
generateResult?: LanguageModelV3GenerateResult;
/** Stream chunks captured from `doStream` (replayed via simulateReadableStream). */
streamChunks?: LanguageModelV3StreamPart[];
/** Insert epoch ms — drives TTL eviction. */
createdAt: number;
}
/** Pluggable cache store for cached LLM results. */
export interface CacheMiddlewareStore {
get(key: string): CacheEntry | undefined;
set(key: string, value: CacheEntry): void;
delete(key: string): void;
}
/** Options for {@link cacheMiddleware}. */
export interface CacheMiddlewareOptions {
/**
* Cache backend. `'memory'` uses a process-local Map; `'disk'` writes to
* `.cache/v3-eval-cache.json` for cross-process fixture sharing. A custom
* {@link CacheMiddlewareStore} can be passed instead.
*
* @default 'memory'
*/
store?: 'memory' | 'disk' | CacheMiddlewareStore;
/**
* TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
*
* @default 86_400_000 (24h)
*/
ttlMs?: number;
/**
* Custom hash function for cache keys. Defaults to a stable hash of
* `{ prompt, modelId, responseFormat }`.
*/
keyHash?: (params: LanguageModelV3CallOptions, modelId: string) => string;
/**
* Optional override for the env gate. When `false`, the middleware acts
* as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
* caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
*/
enabled?: boolean;
/** Optional custom path for the disk store (defaults to `.cache/v3-eval-cache.json`). */
diskPath?: string;
}
/**
* Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and
* `doStream`; on a hit replays the cached payload, on a miss invokes the
* downstream model and stores the result.
*
* Composition note: install **before** budget/trace so cache hits don't
* pay the downstream model cost (the trace/budget middleware still see the
* payload via the wrapped result they observe in their own `wrapGenerate`).
*
* @example
* ```ts
* import { wrapLanguageModel } from 'ai'
* import { cacheMiddleware } from 'ai-functions'
*
* const model = wrapLanguageModel({
* model: openai('gpt-4o'),
* middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }),
* })
* ```
*/
export declare function cacheMiddleware(options?: CacheMiddlewareOptions): LanguageModelV3Middleware;
export {};
//# sourceMappingURL=cache.d.ts.map