UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

128 lines 5.12 kB
/** * embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel` * * Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and * caches the resulting embeddings keyed on * `{ values, modelId, providerOptions }` so a re-embed of the same value * batch with the same model returns the cached vectors without hitting the * provider. * * **Why a separate middleware instead of reusing `cacheMiddleware`?** * AI SDK 6 splits language-model and embedding-model surfaces: * `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against * `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes * `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape * (per-value vector vs. per-prompt completion payload) is also different — * embeddings cache batched arrays, generations cache single result objects. * * - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`. * `values` is the array as-passed (caller can pre-normalise if they want * case/whitespace insensitivity). Generation knobs don't apply. * * - **Batch semantics:** the cache key is the *whole* batch. A subset hit * doesn't trigger a partial-fill — that's a more invasive shape change * (the legacy `EmbeddingCache.getMany` did per-text caching, but it was * only used in the example and added 100+ LOC of bookkeeping). Callers * that want per-text caching should use stable per-text batches. * * - **TTL:** 24h default, configurable. Lazy expiry on access. * * - **Pluggable store:** in-memory default (Map-backed); custom store * honored as-is. Disk persistence is intentionally not provided here — * embedding payloads (large `number[][]`) make on-disk JSON a bad fit; * callers who want it should pass a custom store. * * - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with * `cacheMiddleware`. Override via the `enabled` option. * * @packageDocumentation */ import { hashKey } from '../cache.js'; // ============================================================================ // Stores // ============================================================================ class MemoryStore { map = new Map(); get(key) { return this.map.get(key); } set(key, value) { this.map.set(key, value); } delete(key) { this.map.delete(key); } } // ============================================================================ // Helpers // ============================================================================ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000; function defaultKeyHash(params, modelId) { return hashKey({ values: params.values, modelId, providerOptions: params.providerOptions, }); } function envGateEnabled() { const v = process.env['V3_EVAL_CACHE']; return typeof v === 'string' && v.length > 0; } function isExpired(entry, ttlMs) { return Date.now() - entry.createdAt > ttlMs; } // ============================================================================ // Middleware // ============================================================================ /** * Build an embedding-cache middleware for `wrapEmbeddingModel`. * * @example * ```ts * import { wrapEmbeddingModel } from 'ai' * import { embeddingCacheMiddleware } from 'ai-functions' * * const model = wrapEmbeddingModel({ * model: openai.embedding('text-embedding-3-small'), * middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }), * }) * ``` */ export function embeddingCacheMiddleware(options = {}) { const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS; const keyHash = options.keyHash ?? defaultKeyHash; const store = options.store === undefined || options.store === 'memory' ? new MemoryStore() : options.store; const enabled = options.enabled ?? envGateEnabled(); return { specificationVersion: 'v3', async wrapEmbed({ doEmbed, params, model }) { if (!enabled) return doEmbed(); const key = keyHash(params, model.modelId); const cached = store.get(key); if (cached !== undefined) { if (isExpired(cached, ttlMs)) { store.delete(key); } else { // Replay shape matches EmbeddingModelV3Result. Provider-side // metadata (response headers, body, usage) is intentionally absent // on a hit — callers reading those should disable the cache. const replay = { embeddings: cached.embeddings, warnings: cached.warnings, }; return replay; } } const result = await doEmbed(); store.set(key, { embeddings: result.embeddings, warnings: result.warnings, createdAt: Date.now(), }); return result; }, }; } //# sourceMappingURL=embed-cache.js.map