ai-functions
Version:
Core AI primitives for building intelligent applications
228 lines • 8.61 kB
JavaScript
/**
* cacheMiddleware — content-addressable cache for `wrapLanguageModel`
*
* Implements the AI SDK cookbook's local-caching-middleware pattern
* (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the
* AI SDK 6 `LanguageModelV3Middleware` shape:
*
* - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }`
* so a schema change (responseFormat.type === 'json' carries a `schema`
* JSONSchema7) invalidates the entry. Generation parameters (temperature,
* topP, etc.) are deliberately *not* part of the key for the eval-fixture
* use case — flipping temperature shouldn't blow up a 5x verify-time win.
* Callers who want strict keying should pass a custom `keyHash`.
*
* - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]`
* array; `wrapStream` replays them via `simulateReadableStream` so consumers
* see the same chunked event sequence on a hit. (`wrapGenerate` is the
* common path; both share the same cache map.)
*
* - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are
* evicted on access (lazy expiry — no background timer).
*
* - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to
* a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture
* sharing. Disk reads/writes are best-effort — IO failures fall through
* to the wrapped model.
*
* - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the
* middleware short-circuits to a passthrough — useful for production where
* cache hits would be incorrect but the operator wants the same wrap chain.
* Set to `'1'` (or any truthy non-empty string) to enable.
*
* @packageDocumentation
*/
import { simulateReadableStream } from 'ai';
import { hashKey } from '../cache.js';
// ============================================================================
// Stores
// ============================================================================
class MemoryStore {
map = new Map();
get(key) {
return this.map.get(key);
}
set(key, value) {
this.map.set(key, value);
}
delete(key) {
this.map.delete(key);
}
}
/**
* Disk-backed store. Best-effort — JSON parse / write errors fall through
* silently so a corrupt cache file never blocks an LLM call. The whole map
* is rewritten on each `set` (cheap for the eval-fixture use case which is
* dominated by reads).
*/
class DiskStore {
path;
cache = null;
constructor(path) {
this.path = path;
}
load() {
if (this.cache !== null)
return this.cache;
this.cache = new Map();
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const fs = require('fs');
if (fs.existsSync(this.path)) {
const raw = fs.readFileSync(this.path, 'utf-8');
const parsed = JSON.parse(raw);
for (const [k, v] of Object.entries(parsed)) {
this.cache.set(k, v);
}
}
}
catch {
// best-effort
}
return this.cache;
}
flush() {
if (this.cache === null)
return;
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const fs = require('fs');
// eslint-disable-next-line @typescript-eslint/no-require-imports
const path = require('path');
const dir = path.dirname(this.path);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
const obj = Object.fromEntries(this.cache);
fs.writeFileSync(this.path, JSON.stringify(obj), 'utf-8');
}
catch {
// best-effort
}
}
get(key) {
return this.load().get(key);
}
set(key, value) {
this.load().set(key, value);
this.flush();
}
delete(key) {
this.load().delete(key);
this.flush();
}
}
// ============================================================================
// Helpers
// ============================================================================
const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000;
function defaultKeyHash(params, modelId) {
// Stable hash of prompt + model + responseFormat (which carries the
// schema for object generation). Generation knobs are deliberately
// excluded so the eval-fixture cache survives temperature tweaks.
return hashKey({
prompt: params.prompt,
modelId,
responseFormat: params.responseFormat,
});
}
function envGateEnabled() {
const v = process.env['V3_EVAL_CACHE'];
return typeof v === 'string' && v.length > 0;
}
function isExpired(entry, ttlMs) {
return Date.now() - entry.createdAt > ttlMs;
}
// ============================================================================
// Middleware
// ============================================================================
/**
* Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and
* `doStream`; on a hit replays the cached payload, on a miss invokes the
* downstream model and stores the result.
*
* Composition note: install **before** budget/trace so cache hits don't
* pay the downstream model cost (the trace/budget middleware still see the
* payload via the wrapped result they observe in their own `wrapGenerate`).
*
* @example
* ```ts
* import { wrapLanguageModel } from 'ai'
* import { cacheMiddleware } from 'ai-functions'
*
* const model = wrapLanguageModel({
* model: openai('gpt-4o'),
* middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }),
* })
* ```
*/
export function cacheMiddleware(options = {}) {
const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS;
const keyHash = options.keyHash ?? defaultKeyHash;
const store = options.store === undefined || options.store === 'memory'
? new MemoryStore()
: options.store === 'disk'
? new DiskStore(options.diskPath ?? '.cache/v3-eval-cache.json')
: options.store;
const enabled = options.enabled ?? envGateEnabled();
return {
specificationVersion: 'v3',
async wrapGenerate({ doGenerate, params, model }) {
if (!enabled)
return doGenerate();
const key = keyHash(params, model.modelId);
const cached = store.get(key);
if (cached !== undefined) {
if (isExpired(cached, ttlMs)) {
store.delete(key);
}
else if (cached.generateResult !== undefined) {
return cached.generateResult;
}
}
const result = await doGenerate();
store.set(key, { generateResult: result, createdAt: Date.now() });
return result;
},
async wrapStream({ doStream, params, model }) {
if (!enabled)
return doStream();
const key = keyHash(params, model.modelId);
const cached = store.get(key);
if (cached !== undefined) {
if (isExpired(cached, ttlMs)) {
store.delete(key);
}
else if (cached.streamChunks !== undefined) {
// Replay cached chunks via simulateReadableStream so consumers
// see the same async iteration shape as a fresh call.
const replay = {
stream: simulateReadableStream({
chunks: cached.streamChunks,
initialDelayInMs: 0,
chunkDelayInMs: 0,
}),
};
return replay;
}
}
const result = await doStream();
// Tee the stream: forward to caller, accumulate for cache.
const chunks = [];
const transformedStream = result.stream.pipeThrough(new TransformStream({
transform(chunk, controller) {
chunks.push(chunk);
controller.enqueue(chunk);
},
flush() {
store.set(key, { streamChunks: chunks, createdAt: Date.now() });
},
}));
return {
...result,
stream: transformedStream,
};
},
};
}
//# sourceMappingURL=cache.js.map