ai-functions
Version:
Core AI primitives for building intelligent applications
99 lines • 4.23 kB
TypeScript
/**
* embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel`
*
* Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and
* caches the resulting embeddings keyed on
* `{ values, modelId, providerOptions }` so a re-embed of the same value
* batch with the same model returns the cached vectors without hitting the
* provider.
*
* **Why a separate middleware instead of reusing `cacheMiddleware`?**
* AI SDK 6 splits language-model and embedding-model surfaces:
* `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against
* `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes
* `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape
* (per-value vector vs. per-prompt completion payload) is also different —
* embeddings cache batched arrays, generations cache single result objects.
*
* - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`.
* `values` is the array as-passed (caller can pre-normalise if they want
* case/whitespace insensitivity). Generation knobs don't apply.
*
* - **Batch semantics:** the cache key is the *whole* batch. A subset hit
* doesn't trigger a partial-fill — that's a more invasive shape change
* (the legacy `EmbeddingCache.getMany` did per-text caching, but it was
* only used in the example and added 100+ LOC of bookkeeping). Callers
* that want per-text caching should use stable per-text batches.
*
* - **TTL:** 24h default, configurable. Lazy expiry on access.
*
* - **Pluggable store:** in-memory default (Map-backed); custom store
* honored as-is. Disk persistence is intentionally not provided here —
* embedding payloads (large `number[][]`) make on-disk JSON a bad fit;
* callers who want it should pass a custom store.
*
* - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with
* `cacheMiddleware`. Override via the `enabled` option.
*
* @packageDocumentation
*/
import type { EmbeddingModelV3CallOptions, EmbeddingModelV3Embedding, EmbeddingModelV3Middleware, SharedV3Warning } from '@ai-sdk/provider';
/** Cached embedding payload. */
interface EmbedCacheEntry {
/** The embedding vectors returned for the cached batch. */
embeddings: Array<EmbeddingModelV3Embedding>;
/** Provider warnings carried alongside the cached batch. */
warnings: Array<SharedV3Warning>;
/** Insert epoch ms — drives TTL eviction. */
createdAt: number;
}
/** Pluggable cache store for embedding results. */
export interface EmbedCacheMiddlewareStore {
get(key: string): EmbedCacheEntry | undefined;
set(key: string, value: EmbedCacheEntry): void;
delete(key: string): void;
}
/** Options for {@link embeddingCacheMiddleware}. */
export interface EmbedCacheMiddlewareOptions {
/**
* Cache backend. `'memory'` uses a process-local Map. A custom
* {@link EmbedCacheMiddlewareStore} can be passed instead.
*
* @default 'memory'
*/
store?: 'memory' | EmbedCacheMiddlewareStore;
/**
* TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
*
* @default 86_400_000 (24h)
*/
ttlMs?: number;
/**
* Custom hash function for cache keys. Defaults to a stable hash of
* `{ values, modelId, providerOptions }`.
*/
keyHash?: (params: EmbeddingModelV3CallOptions, modelId: string) => string;
/**
* Optional override for the env gate. When `false`, the middleware acts
* as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
* caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
*/
enabled?: boolean;
}
/**
* Build an embedding-cache middleware for `wrapEmbeddingModel`.
*
* @example
* ```ts
* import { wrapEmbeddingModel } from 'ai'
* import { embeddingCacheMiddleware } from 'ai-functions'
*
* const model = wrapEmbeddingModel({
* model: openai.embedding('text-embedding-3-small'),
* middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }),
* })
* ```
*/
export declare function embeddingCacheMiddleware(options?: EmbedCacheMiddlewareOptions): EmbeddingModelV3Middleware;
export {};
//# sourceMappingURL=embed-cache.d.ts.map