ai-functions
Version:
Core AI primitives for building intelligent applications
249 lines (236 loc) • 8.96 kB
text/typescript
/**
* traceMiddleware — emit per-call trace events for `wrapLanguageModel`
*
* Wraps `doGenerate` / `doStream` and emits a {@link TraceEvent} on every
* completion. The sink is opaque (caller supplies `emit`) so this primitive
* works equally well piping into:
*
* - the v3 cascade-walker InvocationEvent stream (round 16+ work to add
* `'persona-trace'` / `'cascade-trace'` to the union),
* - an {@link import('../eval-log/index.js').EvalLogStore} for fixture
* replay,
* - OpenTelemetry / Datadog / Honeycomb adapters that map the event into
* a span.
*
* **Emit-error tolerance:** if the supplied `emit` throws, we *swallow* the
* error (with a one-time `console.warn`) so a flaky trace sink can never
* break the wrapped LLM call. This matches the Evalite v0.19 trace
* middleware behaviour.
*
* Composition note: install **last** so the event sees the final outcome
* (post-cache, post-budget). The event's `costUsd` field is best-effort —
* the trace middleware doesn't have direct access to the budget tracker, so
* the caller can pass a `getCostUsd` resolver if they want costs in the
* event payload.
*
* @packageDocumentation
*/
import type {
LanguageModelV3CallOptions,
LanguageModelV3GenerateResult,
LanguageModelV3Middleware,
LanguageModelV3StreamPart,
LanguageModelV3StreamResult,
LanguageModelV3Usage,
} from '@ai-sdk/provider'
// ============================================================================
// Types
// ============================================================================
/**
* Discriminator for the originating call site. Callers inject this via the
* `kind` option so a single sink can fan events into different downstream
* streams (persona panel vs. cascade walker vs. ad-hoc test).
*/
export type TraceEventKind = 'persona-trace' | 'cascade-trace' | 'eval-trace' | string
/**
* Trace event payload emitted on every wrapped call completion.
*
* Field design notes:
* - `prompt` / `response` are stringified for cheap downstream storage
* (the structured `LanguageModelV3Prompt` / `LanguageModelV3Content[]`
* shapes are intentionally flattened).
* - `usage` is the raw V3 shape (with the cache breakdown) — the
* EvalLogStore consumer flattens it into total counts.
* - `costUsd` is optional because the trace middleware doesn't compute
* cost itself; callers either pass a resolver or compute downstream
* from `usage`.
*/
export interface TraceEvent {
kind: TraceEventKind
model: string
prompt: string
response: string
usage: LanguageModelV3Usage | undefined
costUsd?: number
durationMs: number
/** Optional caller-supplied tags for downstream filtering. */
tags?: Record<string, string>
}
/** Options for {@link traceMiddleware}. */
export interface TraceMiddlewareOptions {
/**
* Opaque sink. Errors thrown from `emit` are swallowed (with a one-time
* `console.warn`) so a flaky sink never breaks the wrapped LLM call.
*/
emit: (event: TraceEvent) => void | Promise<void>
/**
* Discriminator threaded into the event's `kind` field. Defaults to
* `'eval-trace'`.
*/
kind?: TraceEventKind
/**
* Optional cost resolver. When supplied, called with the V3 usage shape
* and the modelId; result is set on `event.costUsd`. Useful when the
* caller has a side-channel pricing table (the budgetMiddleware's
* tracker) and wants costs in the trace event itself.
*/
getCostUsd?: (modelId: string, usage: LanguageModelV3Usage | undefined) => number
/** Optional caller-supplied tags merged into every emitted event. */
tags?: Record<string, string>
}
// ============================================================================
// Helpers
// ============================================================================
/**
* Flatten the structured V3 prompt into a single string for cheap storage.
* Walks system / user / assistant / tool messages and concatenates their
* text parts. Non-text parts (files, tool results) are summarised with a
* short marker so the trace doesn't grow unboundedly.
*/
function stringifyPrompt(params: LanguageModelV3CallOptions): string {
const out: string[] = []
for (const msg of params.prompt) {
if (msg.role === 'system') {
out.push(`[system] ${msg.content}`)
continue
}
if (typeof msg.content === 'string') {
out.push(`[${msg.role}] ${msg.content}`)
continue
}
if (Array.isArray(msg.content)) {
const parts: string[] = []
for (const part of msg.content) {
if (part.type === 'text') parts.push(part.text)
else parts.push(`[${part.type}]`)
}
out.push(`[${msg.role}] ${parts.join(' ')}`)
}
}
return out.join('\n')
}
/**
* Flatten the V3 generate result content into a single string. Walks the
* `content` array (text, reasoning, tool-call, etc.) and concatenates text
* parts; non-text parts get short summaries.
*/
function stringifyContent(content: LanguageModelV3GenerateResult['content']): string {
const parts: string[] = []
for (const part of content) {
if (part.type === 'text') parts.push(part.text)
else if (part.type === 'reasoning') parts.push(`[reasoning] ${part.text}`)
else parts.push(`[${part.type}]`)
}
return parts.join('')
}
let _hasWarnedEmit = false
async function safeEmit(emit: TraceMiddlewareOptions['emit'], event: TraceEvent): Promise<void> {
try {
await emit(event)
} catch (err) {
if (!_hasWarnedEmit) {
_hasWarnedEmit = true
// eslint-disable-next-line no-console
console.warn(
`[ai-functions/traceMiddleware] emit() threw — subsequent emit errors will be silenced. ${
err instanceof Error ? err.message : String(err)
}`
)
}
}
}
// ============================================================================
// Middleware
// ============================================================================
/**
* Build a trace middleware for `wrapLanguageModel`. Emits a
* {@link TraceEvent} on every successful `doGenerate` / `doStream`
* completion. Errors from `emit` are swallowed (one-time warn) so a flaky
* trace sink can never break the wrapped LLM call.
*
* @example
* ```ts
* import { wrapLanguageModel } from 'ai'
* import { traceMiddleware, getEvalLogStore } from 'ai-functions'
*
* const store = getEvalLogStore()
* const model = wrapLanguageModel({
* model: openai('gpt-4o'),
* middleware: traceMiddleware({
* kind: 'cascade-trace',
* emit: (event) => store.record({ ...event, costUsd: event.costUsd ?? 0 }),
* }),
* })
* ```
*/
export function traceMiddleware(options: TraceMiddlewareOptions): LanguageModelV3Middleware {
const { emit, kind = 'eval-trace', getCostUsd, tags } = options
return {
specificationVersion: 'v3',
async wrapGenerate({ doGenerate, params, model }) {
const start = Date.now()
const result = await doGenerate()
const durationMs = Date.now() - start
const modelId = model.modelId
const event: TraceEvent = {
kind,
model: modelId,
prompt: stringifyPrompt(params),
response: stringifyContent(result.content),
usage: result.usage,
durationMs,
...(getCostUsd !== undefined ? { costUsd: getCostUsd(modelId, result.usage) } : {}),
...(tags !== undefined ? { tags } : {}),
}
await safeEmit(emit, event)
return result
},
async wrapStream({ doStream, params, model }) {
const start = Date.now()
const result = await doStream()
const modelId = model.modelId
let finalUsage: LanguageModelV3Usage | undefined
const collected: string[] = []
const transformedStream = result.stream.pipeThrough(
new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
transform(chunk, controller) {
if (chunk.type === 'text-delta') collected.push(chunk.delta)
else if (chunk.type === 'finish') finalUsage = chunk.usage
controller.enqueue(chunk)
},
flush() {
const durationMs = Date.now() - start
const event: TraceEvent = {
kind,
model: modelId,
prompt: stringifyPrompt(params),
response: collected.join(''),
usage: finalUsage,
durationMs,
...(getCostUsd !== undefined ? { costUsd: getCostUsd(modelId, finalUsage) } : {}),
...(tags !== undefined ? { tags } : {}),
}
// Fire-and-forget — TransformStream.flush is sync; we don't
// await safeEmit so a slow sink doesn't block stream close.
void safeEmit(emit, event)
},
})
)
const wrapped: LanguageModelV3StreamResult = {
...result,
stream: transformedStream,
}
return wrapped
},
}
}