UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

176 lines 6.79 kB
/** * traceMiddleware — emit per-call trace events for `wrapLanguageModel` * * Wraps `doGenerate` / `doStream` and emits a {@link TraceEvent} on every * completion. The sink is opaque (caller supplies `emit`) so this primitive * works equally well piping into: * * - the v3 cascade-walker InvocationEvent stream (round 16+ work to add * `'persona-trace'` / `'cascade-trace'` to the union), * - an {@link import('../eval-log/index.js').EvalLogStore} for fixture * replay, * - OpenTelemetry / Datadog / Honeycomb adapters that map the event into * a span. * * **Emit-error tolerance:** if the supplied `emit` throws, we *swallow* the * error (with a one-time `console.warn`) so a flaky trace sink can never * break the wrapped LLM call. This matches the Evalite v0.19 trace * middleware behaviour. * * Composition note: install **last** so the event sees the final outcome * (post-cache, post-budget). The event's `costUsd` field is best-effort — * the trace middleware doesn't have direct access to the budget tracker, so * the caller can pass a `getCostUsd` resolver if they want costs in the * event payload. * * @packageDocumentation */ // ============================================================================ // Helpers // ============================================================================ /** * Flatten the structured V3 prompt into a single string for cheap storage. * Walks system / user / assistant / tool messages and concatenates their * text parts. Non-text parts (files, tool results) are summarised with a * short marker so the trace doesn't grow unboundedly. */ function stringifyPrompt(params) { const out = []; for (const msg of params.prompt) { if (msg.role === 'system') { out.push(`[system] ${msg.content}`); continue; } if (typeof msg.content === 'string') { out.push(`[${msg.role}] ${msg.content}`); continue; } if (Array.isArray(msg.content)) { const parts = []; for (const part of msg.content) { if (part.type === 'text') parts.push(part.text); else parts.push(`[${part.type}]`); } out.push(`[${msg.role}] ${parts.join(' ')}`); } } return out.join('\n'); } /** * Flatten the V3 generate result content into a single string. Walks the * `content` array (text, reasoning, tool-call, etc.) and concatenates text * parts; non-text parts get short summaries. */ function stringifyContent(content) { const parts = []; for (const part of content) { if (part.type === 'text') parts.push(part.text); else if (part.type === 'reasoning') parts.push(`[reasoning] ${part.text}`); else parts.push(`[${part.type}]`); } return parts.join(''); } let _hasWarnedEmit = false; async function safeEmit(emit, event) { try { await emit(event); } catch (err) { if (!_hasWarnedEmit) { _hasWarnedEmit = true; // eslint-disable-next-line no-console console.warn(`[ai-functions/traceMiddleware] emit() threw — subsequent emit errors will be silenced. ${err instanceof Error ? err.message : String(err)}`); } } } // ============================================================================ // Middleware // ============================================================================ /** * Build a trace middleware for `wrapLanguageModel`. Emits a * {@link TraceEvent} on every successful `doGenerate` / `doStream` * completion. Errors from `emit` are swallowed (one-time warn) so a flaky * trace sink can never break the wrapped LLM call. * * @example * ```ts * import { wrapLanguageModel } from 'ai' * import { traceMiddleware, getEvalLogStore } from 'ai-functions' * * const store = getEvalLogStore() * const model = wrapLanguageModel({ * model: openai('gpt-4o'), * middleware: traceMiddleware({ * kind: 'cascade-trace', * emit: (event) => store.record({ ...event, costUsd: event.costUsd ?? 0 }), * }), * }) * ``` */ export function traceMiddleware(options) { const { emit, kind = 'eval-trace', getCostUsd, tags } = options; return { specificationVersion: 'v3', async wrapGenerate({ doGenerate, params, model }) { const start = Date.now(); const result = await doGenerate(); const durationMs = Date.now() - start; const modelId = model.modelId; const event = { kind, model: modelId, prompt: stringifyPrompt(params), response: stringifyContent(result.content), usage: result.usage, durationMs, ...(getCostUsd !== undefined ? { costUsd: getCostUsd(modelId, result.usage) } : {}), ...(tags !== undefined ? { tags } : {}), }; await safeEmit(emit, event); return result; }, async wrapStream({ doStream, params, model }) { const start = Date.now(); const result = await doStream(); const modelId = model.modelId; let finalUsage; const collected = []; const transformedStream = result.stream.pipeThrough(new TransformStream({ transform(chunk, controller) { if (chunk.type === 'text-delta') collected.push(chunk.delta); else if (chunk.type === 'finish') finalUsage = chunk.usage; controller.enqueue(chunk); }, flush() { const durationMs = Date.now() - start; const event = { kind, model: modelId, prompt: stringifyPrompt(params), response: collected.join(''), usage: finalUsage, durationMs, ...(getCostUsd !== undefined ? { costUsd: getCostUsd(modelId, finalUsage) } : {}), ...(tags !== undefined ? { tags } : {}), }; // Fire-and-forget — TransformStream.flush is sync; we don't // await safeEmit so a slow sink doesn't block stream close. void safeEmit(emit, event); }, })); const wrapped = { ...result, stream: transformedStream, }; return wrapped; }, }; } //# sourceMappingURL=trace.js.map