UNPKG

@tanstack/ai

Version:

Type-safe TypeScript AI SDK for streaming chat, tool calling, agents, structured outputs, and multimodal generation.

1,424 lines (1,307 loc) 106 kB
/** * Text Activity * * Handles agentic text generation, one-shot text generation, and agentic structured output. * This is a self-contained module with implementation, types, and JSDoc. */ import { devtoolsMiddleware } from '@tanstack/ai-event-client' import { stripToSpecMiddleware } from '../../strip-to-spec-middleware' import { streamToText } from '../../stream-to-response.js' import { resolveDebugOption } from '../../logger/resolve' import { EventType } from '../../types' import { normalizeToolResult } from '../../utilities/tool-result' import { LazyToolManager } from './tools/lazy-tool-manager' import { MiddlewareAbortError, ToolCallManager, executeToolCalls, } from './tools/tool-calls' import { convertSchemaToJsonSchema, isStandardSchema, parseWithStandardSchema, } from './tools/schema-converter' import { maxIterations as maxIterationsStrategy } from './agent-loop-strategies' import { convertMessagesToModelMessages, generateMessageId } from './messages' import { MiddlewareRunner } from './middleware/compose' import { CapabilityRegistry } from './middleware/capabilities' import { validateCapabilities } from './middleware/validate' import { MCPManager } from './mcp/manager' import type { ApprovalRequest, ClientToolRequest, ToolResult, } from './tools/tool-calls' import type { AnyTextAdapter, StructuredOutputOptions } from './adapter' import type { AgentLoopStrategy, AnyTool, ConstrainedModelMessage, CustomEvent, InferSchemaType, JSONSchema, ModelMessage, RunFinishedEvent, SchemaInput, StreamChunk, StructuredOutputCompleteEvent, StructuredOutputStream, TextMessageContentEvent, TextOptions, ToolCall, ToolCallArgsEvent, ToolCallEndEvent, ToolCallStartEvent, UIMessage, } from '../../types' import type { AnyChatMiddleware, ChatMiddleware, ChatMiddlewareConfig, ChatMiddlewareContext, StructuredOutputMiddlewareConfig, } from './middleware/types' import type { CheckCoverage } from './middleware/builder' import type { SystemPrompt } from '../../system-prompts' import type { InternalLogger } from '../../logger/internal-logger' import type { DebugOption } from '../../logger/types' import type { ProviderTool } from '../../tools/provider-tool' import type { ContextFromMiddleware, ContextFromTool, DefinedContext, MergeContext, UnionToIntersection, } from './runtime-context-types' import type { ChatMCPOptions } from './mcp/types' // =========================== // Activity Kind // =========================== /** The adapter kind this activity handles */ export const kind = 'text' as const type AnyRuntimeTool = AnyTool // The leaf context-inference primitives (KnownContext, MergeContext, // UnionToIntersection, DefinedContext, ContextFromTool, ContextFromMiddleware) // are shared with the tool execution layer — see ./runtime-context-types. type ContextFromConsumer<T> = ContextFromTool<T> | ContextFromMiddleware<T> type RequiredContextFromConsumerUnion<T> = T extends unknown ? undefined extends ContextFromConsumer<T> ? never : ContextFromConsumer<T> : never type ContextFromConsumerUnion<T> = [ UnionToIntersection<DefinedContext<ContextFromConsumer<T>>>, ] extends [never] ? never : [RequiredContextFromConsumerUnion<T>] extends [never] ? UnionToIntersection<DefinedContext<ContextFromConsumer<T>>> | undefined : UnionToIntersection<DefinedContext<ContextFromConsumer<T>>> type ContextFromArray<T> = T extends readonly [infer THead, ...infer TTail] ? MergeContext<ContextFromConsumer<THead>, ContextFromArray<TTail>> : T extends ReadonlyArray<infer TItem> ? ContextFromConsumerUnion<TItem> : never type ContextFromInputs<TTools, TMiddleware> = MergeContext< ContextFromArray<NonNullable<TTools>>, ContextFromArray<NonNullable<TMiddleware>> > type InferredContext<TTools, TMiddleware> = [ ContextFromInputs<TTools, TMiddleware>, ] extends [never] ? unknown : ContextFromInputs<TTools, TMiddleware> type RequiredContextFromInputs<TTools, TMiddleware> = [ ContextFromInputs<TTools, TMiddleware>, ] extends [never] ? { context?: unknown } : undefined extends ContextFromInputs<TTools, TMiddleware> ? { context?: ContextFromInputs<TTools, TMiddleware> } : { context: ContextFromInputs<TTools, TMiddleware> } type TextActivityOptionsWithContext< TAdapter extends AnyTextAdapter, TSchema extends SchemaInput | undefined, TStream extends boolean, TTools extends TextActivityOptions<TAdapter, TSchema, TStream, any>['tools'], TMiddleware extends TextActivityOptions< TAdapter, TSchema, TStream, any >['middleware'], > = Omit< TextActivityOptions<TAdapter, TSchema, TStream, any>, 'tools' | 'middleware' | 'context' > & { tools?: TTools middleware?: TMiddleware & CheckCoverage<Extract<TMiddleware, ReadonlyArray<AnyChatMiddleware>>> } & RequiredContextFromInputs<TTools, TMiddleware> // =========================== // Activity Options Type // =========================== /** * Options for the text activity. * Types are extracted directly from the adapter (which has pre-resolved generics). * * @template TAdapter - The text adapter type (created by a provider function) * @template TSchema - Optional Standard Schema for structured output * @template TStream - Whether to stream the output (default: true) */ export interface TextActivityOptions< TAdapter extends AnyTextAdapter, TSchema extends SchemaInput | undefined, TStream extends boolean, TContext = unknown, > { /** The text adapter to use (created by a provider function like openaiText('gpt-4o')) */ adapter: TAdapter /** * Conversation messages. Accepts: * - `ConstrainedModelMessage` — content types constrained by the adapter's input modalities. * - `ModelMessage` — unconstrained model message (e.g., forwarded from an AG-UI wire payload). * - `UIMessage` — parts-based UI representation; converted internally via `convertMessagesToModelMessages`. * * The three shapes can be mixed in a single array (e.g., when forwarding a wire payload that includes both anchor UIMessages and AG-UI fan-out ModelMessages). */ messages?: | Array< | UIMessage | ModelMessage | ConstrainedModelMessage<{ inputModalities: TAdapter['~types']['inputModalities'] messageMetadataByModality: TAdapter['~types']['messageMetadataByModality'] }> > | undefined /** * System prompts to prepend to the conversation. * * Accepts plain strings or `{ content, metadata }` objects. The `metadata` * field is typed by the adapter — Anthropic narrows it to * `AnthropicSystemPromptMetadata` (with `cache_control` for prompt * caching), providers without per-prompt metadata reject the field * entirely. */ systemPrompts?: | Array<SystemPrompt<TAdapter['~types']['systemPromptMetadata']>> | undefined /** * Tools for function calling (auto-executed when called). * * Accepts two shapes: * - User-defined tools via `toolDefinition()` — plain `Tool`, always assignable. * - Provider tools from `@tanstack/ai-<provider>/tools` (e.g. `webSearchTool`) * — branded and type-checked against the selected model's * `supports.tools` list. Passing an unsupported tool produces a * compile-time error on the array element. */ tools?: | Array< | (AnyRuntimeTool & { readonly '~toolKind'?: never }) | ProviderTool<string, TAdapter['~types']['toolCapabilities'][number]> > | undefined /** * Hand MCP clients/pools to chat(): their tools are discovered at run start * and merged into the run; `connection` controls whether chat() closes them * when the run ends. See docs/tools/mcp.md "Managing MCP clients with chat()". */ mcp?: ChatMCPOptions /** Additional metadata to attach to the request. */ metadata?: TextOptions['metadata'] /** Model-specific provider options (type comes from adapter) */ modelOptions?: TAdapter['~types']['providerOptions'] /** AbortController for cancellation */ abortController?: TextOptions['abortController'] /** Strategy for controlling the agent loop */ agentLoopStrategy?: TextOptions['agentLoopStrategy'] /** Unique conversation identifier for tracking */ conversationId?: TextOptions['conversationId'] /** Thread/conversation ID for AG-UI protocol. Auto-generated if not provided. */ threadId?: TextOptions['threadId'] /** Run ID override for AG-UI protocol. Auto-generated by adapter if not provided. */ runId?: TextOptions['runId'] /** Parent run ID for AG-UI protocol nested run correlation. */ parentRunId?: TextOptions['parentRunId'] /** * Optional Standard Schema for structured output. * When provided, the activity will: * 1. Run the full agentic loop (executing tools as needed) * 2. Once complete, return a Promise with the parsed output matching the schema * * Supports any Standard Schema compliant library (Zod v4+, ArkType, Valibot, etc.) * * @example * ```ts * const result = await chat({ * adapter: openaiText('gpt-4o'), * messages: [{ role: 'user', content: 'Generate a person' }], * outputSchema: z.object({ name: z.string(), age: z.number() }) * }) * // result is { name: string, age: number } * ``` */ outputSchema?: TSchema /** * Whether to stream the text result. * When true (default), returns an AsyncIterable<StreamChunk> for streaming output. * When false, returns a Promise<string> with the collected text content. * * Note: If outputSchema is provided, this option is ignored and the result * is always a Promise<InferSchemaType<TSchema>>. * * @default true * * @example Non-streaming text * ```ts * const text = await chat({ * adapter: openaiText('gpt-4o'), * messages: [{ role: 'user', content: 'Hello!' }], * stream: false * }) * // text is a string with the full response * ``` */ stream?: TStream /** * Optional middleware array for observing/transforming chat behavior. * Middleware hooks are called in array order. See {@link ChatMiddleware} for available hooks. * * @example * ```ts * const stream = chat({ * adapter: openaiText('gpt-4o'), * messages: [...], * middleware: [loggingMiddleware, redactionMiddleware], * }) * ``` */ middleware?: Array<ChatMiddleware<TContext>> /** * Runtime context value passed to middleware hooks and server tools. */ context?: TContext /** * Enable debug logging. Pass `true` to enable all categories with the default * console logger, `false` to silence everything, or a `DebugConfig` object for * granular control and/or a custom `Logger`. Defaults to `undefined`, which * means only the `errors` category is active. */ debug?: DebugOption } // =========================== // Chat Options Helper // =========================== /** * Create typed options for the chat() function without executing. * This is useful for pre-defining configurations with full type inference. * * @example * ```ts * const chatOptions = createChatOptions({ * adapter: anthropicText('claude-sonnet-4-5'), * }) * * const stream = chat({ ...chatOptions, messages }) * ``` */ export function createChatOptions< TAdapter extends AnyTextAdapter, TSchema extends SchemaInput | undefined = undefined, TStream extends boolean = true, const TTools extends TextActivityOptions< TAdapter, TSchema, TStream, any >['tools'] = TextActivityOptions<TAdapter, TSchema, TStream, any>['tools'], const TMiddleware extends TextActivityOptions< TAdapter, TSchema, TStream, any >['middleware'] = TextActivityOptions< TAdapter, TSchema, TStream, any >['middleware'], >( options: TextActivityOptionsWithContext< TAdapter, TSchema, TStream, TTools, TMiddleware >, ): TextActivityOptions< TAdapter, TSchema, TStream, InferredContext<TTools, TMiddleware> > { return options } // =========================== // Activity Result Type // =========================== /** * Result type for the text activity. * - If outputSchema is provided AND stream is explicitly true: * StructuredOutputStream<InferSchemaType<TSchema>> — yields raw JSON deltas * via TEXT_MESSAGE_CONTENT plus a terminal StructuredOutputCompleteEvent * carrying the validated object. * - If outputSchema is provided without explicit stream:true: * Promise<InferSchemaType<TSchema>>. * - If stream is explicitly false (no schema): Promise<string>. * - Otherwise (default): AsyncIterable<StreamChunk>. * * `[TStream] extends [true]` is used (not `TStream extends true`) so that the * default `boolean` value of `TStream` does *not* match the streaming branch. * Without this, plain `chat({ outputSchema })` would type as a stream while * the runtime returns a Promise — see issue #526. */ export type TextActivityResult< TSchema extends SchemaInput | undefined, TStream extends boolean = boolean, > = TSchema extends SchemaInput ? [TStream] extends [true] ? StructuredOutputStream<InferSchemaType<TSchema>> : Promise<InferSchemaType<TSchema>> : [TStream] extends [false] ? Promise<string> : AsyncIterable<StreamChunk> // =========================== // ChatEngine Implementation // =========================== interface TextEngineConfig< TAdapter extends AnyTextAdapter, TContext = unknown, TParams extends TextOptions<any, any, TContext> = TextOptions< any, any, TContext >, > { adapter: TAdapter systemPrompts?: Array<SystemPrompt> params: TParams middleware?: Array<ChatMiddleware<TContext>> context?: TContext /** * If set, after the agent loop finishes the engine runs a * structured-output finalization step through the same middleware * pipeline. See `runStructuredFinalization` for the flow. * * - jsonSchema: the JSON Schema to send to the provider * - yieldChunks: when true, finalization chunks are yielded to the caller * (used by runStreamingStructuredOutput). When false, chunks are * consumed internally for middleware visibility but not yielded * (used by runAgenticStructuredOutput). * - validate: optional callback invoked AFTER the structured-output result * is captured but BEFORE the terminal hook fires. If it throws, the * engine records a `finalizationError` and fires `onError` instead of * `onFinish` (per spec §7.3). On success, the returned value is stored * as the validated result and retrievable via * `getValidatedStructuredOutput()`. Used by `runAgenticStructuredOutput` * to perform Standard Schema validation inside the engine. * - nativeCombined: when true, the adapter declared * `supportsCombinedToolsAndSchema()` and the engine wires `jsonSchema` * into the regular `chatStream` call instead of running a separate * finalization round-trip. The agent loop's final-turn text is the * schema-constrained JSON; the engine parses it from accumulated * content. The `'structuredOutput'` middleware phase does NOT fire on * this path — middleware sees the run through `beforeModel` / * `modelStream` as usual. */ finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean validate?: (data: unknown) => unknown nativeCombined?: boolean } } type ToolPhaseResult = 'continue' | 'stop' | 'wait' type CyclePhase = 'processText' | 'executeToolCalls' /** * Combine two optional AbortSignals into one that aborts when either does. * Returns the other signal directly when one is absent or already aborted. * (Manual implementation — `AbortSignal.any` requires Node >= 20.3.) */ function combineAbortSignals( a: AbortSignal | undefined, b: AbortSignal | undefined, ): AbortSignal | undefined { if (!a) return b if (!b) return a if (a.aborted) return a if (b.aborted) return b const controller = new AbortController() const onAbort = (source: AbortSignal) => () => { controller.abort(source.reason) } a.addEventListener('abort', onAbort(a), { once: true }) b.addEventListener('abort', onAbort(b), { once: true }) return controller.signal } class TextEngine< TAdapter extends AnyTextAdapter, TContext = unknown, TParams extends TextOptions<any, any, TContext> = TextOptions< any, any, TContext >, > { private readonly adapter: TAdapter private params: TParams private systemPrompts: Array<SystemPrompt> private tools: Array<AnyRuntimeTool> private readonly loopStrategy: AgentLoopStrategy private toolCallManager: ToolCallManager<ReadonlyArray<AnyTool>, TContext> private readonly lazyToolManager: LazyToolManager private readonly initialMessageCount: number private readonly requestId: string private readonly streamId: string private readonly effectiveRequest?: Request | RequestInit private readonly effectiveSignal?: AbortSignal private messages: Array<ModelMessage> private iterationCount = 0 private lastFinishReason: string | null = null private streamStartTime = 0 private totalChunkCount = 0 private currentMessageId: string | null = null private accumulatedContent = '' private accumulatedThinking: Array<{ content: string; signature?: string }> = [] private currentThinkingContent = '' private currentThinkingSignature = '' private eventOptions?: Record<string, unknown> | undefined private eventToolNames?: Array<string> private finishedEvent: RunFinishedEvent | null = null private earlyTermination = false private toolPhase: ToolPhaseResult = 'continue' private cyclePhase: CyclePhase = 'processText' // Client state extracted from initial messages (before conversion to ModelMessage) private readonly initialApprovals: Map<string, boolean> private readonly initialClientToolResults: Map<string, any> // AG-UI protocol IDs private readonly threadId: string private readonly runIdOverride?: string private readonly parentRunIdOverride?: string // Middleware support private readonly middlewareRunner: MiddlewareRunner<TContext> private readonly middlewareCtx: ChatMiddlewareContext<TContext> private readonly deferredPromises: Array<Promise<unknown>> = [] private abortReason?: string private readonly middlewareAbortController?: AbortController // Combines the caller's signal with middleware abort() so running tools // observe both cancellation sources via ctx.abortSignal. private readonly toolAbortSignal?: AbortSignal private terminalHookCalled = false private readonly logger: InternalLogger // Structured-output finalization state (populated by runStructuredFinalization) private structuredOutputResult: { data: unknown; rawText: string } | null = null // Native combined mode: tracks whether we've already emitted the synthetic // `structured-output.start` event before the schema-constrained final-turn // text begins streaming. The event must precede the first // TEXT_MESSAGE_START so the client-side StreamProcessor routes the JSON // deltas into a StructuredOutputPart instead of a plain TextPart. private combinedStartEmitted = false // Native combined mode: messageId we want the synthetic // `structured-output.start` (and any error emitted before deltas arrive) // to carry, so the client matches it to the streaming text deltas. private combinedStructuredMessageId: string | null = null // Holds the validated value when `finalStructuredOutput.validate` is provided // and succeeds. Distinct from `structuredOutputResult.data` (the raw, // unvalidated payload from the structured-output.complete chunk). private validatedStructuredOutput: unknown = undefined private hasValidatedStructuredOutput = false private finalizationError: { message: string code?: string cause?: unknown } | null = null private readonly finalStructuredOutput?: { jsonSchema: JSONSchema yieldChunks: boolean validate?: (data: unknown) => unknown nativeCombined?: boolean } constructor( config: TextEngineConfig<TAdapter, TContext, TParams>, logger: InternalLogger, ) { this.logger = logger this.adapter = config.adapter this.finalStructuredOutput = config.finalStructuredOutput this.params = config.params this.systemPrompts = config.params.systemPrompts || [] this.loopStrategy = config.params.agentLoopStrategy || maxIterationsStrategy(5) this.initialMessageCount = config.params.messages.length // Extract client state (approvals, client tool results) from original messages BEFORE conversion // This preserves UIMessage parts data that would be lost during conversion to ModelMessage const { approvals, clientToolResults } = this.extractClientStateFromOriginalMessages( config.params.messages as Array<any>, ) this.initialApprovals = approvals this.initialClientToolResults = clientToolResults // Convert messages to ModelMessage format (handles both UIMessage and ModelMessage input) // This ensures consistent internal format regardless of what the client sends this.messages = convertMessagesToModelMessages(config.params.messages) // Initialize lazy tool manager after messages are converted (needs message history for scanning) this.lazyToolManager = new LazyToolManager( config.params.tools || [], this.messages, ) this.tools = this.lazyToolManager.getActiveTools() this.toolCallManager = new ToolCallManager< ReadonlyArray<AnyTool>, TContext >(this.tools) this.requestId = this.createId('chat') this.streamId = this.createId('stream') this.effectiveRequest = config.params.abortController ? { signal: config.params.abortController.signal } : undefined this.effectiveSignal = config.params.abortController?.signal // `conversationId` is the legacy alias of `threadId` — accept it // as a fallback so `chat({ conversationId })` keeps working, with // explicit `threadId` winning when both are set. this.threadId = config.params.threadId || config.params.conversationId || this.createId('thread') this.runIdOverride = config.params.runId this.parentRunIdOverride = config.params.parentRunId // Initialize middleware — devtools first, strip-to-spec always last. // handleStreamChunk processes raw chunks BEFORE middleware, so internal // state management sees extended fields (finishReason, delta, toolCallName, etc.). // The strip middleware ensures the yielded public stream is AG-UI spec-compliant. const allMiddleware: Array<ChatMiddleware<TContext>> = [ devtoolsMiddleware(), ...(config.middleware || []), stripToSpecMiddleware(), ] this.middlewareRunner = new MiddlewareRunner(allMiddleware, logger) this.middlewareAbortController = new AbortController() this.toolAbortSignal = combineAbortSignals( this.effectiveSignal, this.middlewareAbortController.signal, ) this.middlewareCtx = { requestId: this.requestId, streamId: this.streamId, runId: this.runIdOverride ?? this.requestId, threadId: this.threadId, // Legacy alias kept on the ctx so middleware that reads // `ctx.conversationId` keeps working. Always equals `threadId`. conversationId: this.threadId, phase: 'init', iteration: 0, chunkIndex: 0, signal: this.effectiveSignal, abort: (reason?: string) => { this.abortReason = reason this.middlewareAbortController?.abort(reason) }, context: config.context as TContext, defer: (promise: Promise<unknown>) => { this.deferredPromises.push(promise) }, // Provider / adapter info provider: config.adapter.name, model: config.params.model, source: 'server', streaming: true, // Config-derived (updated in beforeRun and applyMiddlewareConfig) systemPrompts: this.systemPrompts, toolNames: undefined, options: undefined, modelOptions: config.params.modelOptions, // Computed messageCount: this.initialMessageCount, hasTools: this.tools.length > 0, // Mutable per-iteration currentMessageId: null, accumulatedContent: '', // References messages: this.messages, createId: (prefix: string) => this.createId(prefix), // Capability bookkeeping for this request (populated by middleware setup) capabilities: new CapabilityRegistry(), // Convenience accessors that delegate to a capability handle's own // tuple getter/provider, keyed by this context. `getX(ctx)` and // `ctx.get(X)` are interchangeable. get: (capability) => capability[0](this.middlewareCtx), getOptional: (capability) => capability[0](this.middlewareCtx, { optional: true }), provide: (capability, value) => capability[1](this.middlewareCtx, value), } } /** Get the accumulated content after the chat loop completes */ getAccumulatedContent(): string { return this.accumulatedContent } /** Get the final messages array after the chat loop completes */ getMessages(): Array<ModelMessage> { return this.messages } /** Returns the structured-output result if finalization ran successfully. */ getStructuredOutputResult(): { data: unknown; rawText: string } | null { return this.structuredOutputResult } /** * Returns the validated structured-output value (the result of running * `finalStructuredOutput.validate` against the raw structured-output data) * wrapped in a `{ value }` object so callers can distinguish "no validation * happened" from "validation produced undefined". Returns `null` when no * validator was configured or validation hasn't been performed yet. */ getValidatedStructuredOutput(): { value: unknown } | null { return this.hasValidatedStructuredOutput ? { value: this.validatedStructuredOutput } : null } /** Returns the recorded finalization error, if any. */ getFinalizationError(): { message: string code?: string cause?: unknown } | null { return this.finalizationError } async *run(): AsyncGenerator<StreamChunk> { this.beforeRun() this.logger.agentLoop('run started', { threadId: this.middlewareCtx.threadId, }) try { // Provision capabilities before any consumer (onConfig onward) can read them await this.middlewareRunner.runSetup(this.middlewareCtx) // Run initial onConfig (phase = init) this.middlewareCtx.phase = 'init' const initialConfig = this.buildMiddlewareConfig() const transformedConfig = await this.middlewareRunner.runOnConfig( this.middlewareCtx, initialConfig, ) this.applyMiddlewareConfig(transformedConfig) // Run onStart (devtools middleware emits text:request:started and initial messages here) await this.middlewareRunner.runOnStart(this.middlewareCtx) const pendingPhase = yield* this.checkForPendingToolCalls() if (pendingPhase === 'wait') { return } // Skip the agent loop entirely when there are no tools AND a separate // structured-output finalization will run. Without tools the model has // nothing to do in the loop, so executing one iteration would burn an // extra provider call before the finalization request. // // Native combined mode does NOT skip — the agent loop itself produces // the schema-constrained final answer in one pass (model emits the // schema-constrained text on its natural final turn). Even with zero // tools, the single chatStream call IS the structured-output call. const skipAgentLoop = !!this.finalStructuredOutput && this.tools.length === 0 && this.finalStructuredOutput.nativeCombined !== true if (!skipAgentLoop) { do { if (this.earlyTermination || this.isCancelled()) { return } this.logger.agentLoop(`iteration=${this.middlewareCtx.iteration}`, { iteration: this.middlewareCtx.iteration, }) await this.beginCycle() if (this.cyclePhase === 'processText') { // Run onConfig before each model call (phase = beforeModel) this.middlewareCtx.phase = 'beforeModel' this.middlewareCtx.iteration = this.iterationCount const iterConfig = this.buildMiddlewareConfig() const iterTransformedConfig = await this.middlewareRunner.runOnConfig( this.middlewareCtx, iterConfig, ) this.applyMiddlewareConfig(iterTransformedConfig) yield* this.streamModelResponse() } else { yield* this.processToolCalls() } this.endCycle() } while (this.shouldContinue()) } this.logger.agentLoop('run finished', { finishReason: this.lastFinishReason, }) // After the agent loop ends, if a structured-output finalization was // requested AND the run hasn't already errored/aborted, run it through // the middleware pipeline. The terminal hook fires once at the very // end (after finalization), not after the agent loop. // // Native combined mode takes a different path: the agent loop's final- // turn text IS the schema-constrained JSON, so we harvest it from // `accumulatedContent` instead of issuing a second provider call. if ( this.finalStructuredOutput && !this.isCancelled() && !this.finalizationError ) { if (this.finalStructuredOutput.nativeCombined === true) { yield* this.harvestCombinedStructuredOutput() } else { yield* this.runStructuredFinalization() } } // Call terminal hook (skip when waiting for client — stream is paused, not finished). // Priority: finalizationError → onError; otherwise normal onFinish. // Skip on cancellation — the finally block routes aborts to onAbort. if ( !this.terminalHookCalled && this.toolPhase !== 'wait' && !this.isCancelled() ) { if (this.finalizationError) { this.terminalHookCalled = true const errForHook = new Error( this.finalizationError.message, this.finalizationError.cause !== undefined ? { cause: this.finalizationError.cause } : undefined, ) if (this.finalizationError.code !== undefined) { Object.defineProperty(errForHook, 'code', { value: this.finalizationError.code, enumerable: true, }) } await this.middlewareRunner.runOnError(this.middlewareCtx, { error: errForHook, duration: Date.now() - this.streamStartTime, }) } else { this.terminalHookCalled = true await this.middlewareRunner.runOnFinish(this.middlewareCtx, { finishReason: this.lastFinishReason, duration: Date.now() - this.streamStartTime, content: this.accumulatedContent, usage: this.finishedEvent?.usage, }) } } } catch (error: unknown) { if (!this.terminalHookCalled) { this.terminalHookCalled = true if (error instanceof MiddlewareAbortError) { // Middleware abort decision — call onAbort, not onError this.abortReason = error.message await this.middlewareRunner.runOnAbort(this.middlewareCtx, { reason: error.message, duration: Date.now() - this.streamStartTime, }) } else { // Genuine error — call onError this.logger.errors('chat run failed', { error, threadId: this.middlewareCtx.threadId, }) await this.middlewareRunner.runOnError(this.middlewareCtx, { error, duration: Date.now() - this.streamStartTime, }) } } // Don't rethrow middleware abort errors — the run just stops gracefully if (!(error instanceof MiddlewareAbortError)) { throw error } } finally { // Check for abort terminal hook if (!this.terminalHookCalled && this.isCancelled()) { this.terminalHookCalled = true await this.middlewareRunner.runOnAbort(this.middlewareCtx, { reason: this.abortReason, duration: Date.now() - this.streamStartTime, }) } // Await deferred promises (non-blocking side effects) if (this.deferredPromises.length > 0) { await Promise.allSettled(this.deferredPromises) } } } private beforeRun(): void { this.streamStartTime = Date.now() const { tools, metadata } = this.params // Gather flattened options into an object for context const options: Record<string, unknown> = {} if (metadata !== undefined) options.metadata = metadata this.eventOptions = Object.keys(options).length > 0 ? options : undefined this.eventToolNames = tools?.map((t) => t.name) // Update middleware context with computed fields this.middlewareCtx.options = this.eventOptions this.middlewareCtx.toolNames = this.eventToolNames } private async beginCycle(): Promise<void> { if (this.cyclePhase === 'processText') { await this.beginIteration() } } private endCycle(): void { if (this.cyclePhase === 'processText') { this.cyclePhase = 'executeToolCalls' return } this.cyclePhase = 'processText' this.iterationCount++ } private async beginIteration(): Promise<void> { this.currentMessageId = this.createId('msg') this.accumulatedContent = '' this.accumulatedThinking = [] this.currentThinkingContent = '' this.currentThinkingSignature = '' this.finishedEvent = null // Update mutable context fields this.middlewareCtx.currentMessageId = this.currentMessageId this.middlewareCtx.accumulatedContent = '' // Notify middleware of new iteration (devtools emits assistant message:created here) await this.middlewareRunner.runOnIteration(this.middlewareCtx, { iteration: this.iterationCount, messageId: this.currentMessageId, }) } private async *streamModelResponse(): AsyncGenerator<StreamChunk> { const { metadata, modelOptions } = this.params const tools = this.tools // Convert tool schemas to JSON Schema before passing to adapter const toolsWithJsonSchemas = tools.map((tool) => ({ ...tool, inputSchema: tool.inputSchema ? convertSchemaToJsonSchema(tool.inputSchema) : undefined, outputSchema: tool.outputSchema ? convertSchemaToJsonSchema(tool.outputSchema) : undefined, })) this.middlewareCtx.phase = 'modelStream' const providerName = (this.adapter as { provider?: string }).provider ?? this.adapter.name this.logger.request( `activity=chat provider=${providerName} model=${this.params.model} messages=${this.messages.length} tools=${this.tools.length} stream=true`, { provider: providerName, model: this.params.model, messageCount: this.messages.length, toolCount: this.tools.length, }, ) // When the adapter declared `supportsCombinedToolsAndSchema()`, the // activity layer set `nativeCombined: true` and we forward the // pre-converted JSON Schema into the regular chatStream call. The // adapter wires it into the upstream request (e.g. `response_format`, // `text.format`, `output_format`) so the model's final-turn text is // schema-constrained and the engine can harvest it from the agent loop // without a separate finalization round-trip. const combinedSchema = this.finalStructuredOutput?.nativeCombined === true ? this.finalStructuredOutput.jsonSchema : undefined for await (const chunk of this.adapter.chatStream({ model: this.params.model, messages: this.messages, tools: toolsWithJsonSchemas, metadata, request: this.effectiveRequest, modelOptions, systemPrompts: this.systemPrompts, logger: this.logger, threadId: this.threadId, runId: this.runIdOverride, parentRunId: this.parentRunIdOverride, ...(combinedSchema ? { outputSchema: combinedSchema } : {}), })) { if (this.isCancelled()) { break } this.totalChunkCount++ // Process the original (unstripped) chunk for internal state management // BEFORE middleware, so fields like finishReason, delta, etc. are available this.handleStreamChunk(chunk) // Native combined mode: synthesize `structured-output.start` BEFORE // the first TEXT_MESSAGE_START so the client-side StreamProcessor // routes the schema-constrained JSON deltas into a // StructuredOutputPart. We delay synthesis until we actually see // text starting — intermediate tool-call iterations don't need it, // and emitting at run-start would wrap tool-call commentary into a // structured-output part too. if ( this.finalStructuredOutput?.nativeCombined === true && this.finalStructuredOutput.yieldChunks && !this.combinedStartEmitted && chunk.type === EventType.TEXT_MESSAGE_START ) { this.combinedStartEmitted = true const messageId = typeof chunk.messageId === 'string' && chunk.messageId !== '' ? chunk.messageId : generateMessageId() this.combinedStructuredMessageId = messageId const synthStart: StreamChunk = { type: EventType.CUSTOM, name: 'structured-output.start', value: { messageId }, model: this.params.model, timestamp: Date.now(), threadId: this.threadId, ...(this.runIdOverride ? { runId: this.runIdOverride } : {}), } const synthOutputs = await this.middlewareRunner.runOnChunk( this.middlewareCtx, synthStart, ) for (const outputChunk of synthOutputs) { yield outputChunk this.middlewareCtx.chunkIndex++ } } // Pipe chunk through middleware (devtools middleware observes; strip-to-spec cleans) const outputChunks = await this.middlewareRunner.runOnChunk( this.middlewareCtx, chunk, ) // When a streaming structured-output finalization step will run after // the agent loop, suppress the agent-loop's RUN_STARTED/RUN_FINISHED // here — the finalization step emits the single outer lifecycle pair // that reaches the consumer. // // Native combined mode does NOT issue a second adapter stream — the // agent loop's lifecycle IS the outer pair the consumer sees. const suppressAgentLifecycle = !!this.finalStructuredOutput && this.finalStructuredOutput.yieldChunks && this.finalStructuredOutput.nativeCombined !== true for (const outputChunk of outputChunks) { if ( suppressAgentLifecycle && (outputChunk.type === EventType.RUN_STARTED || outputChunk.type === EventType.RUN_FINISHED) ) { continue } this.logger.output(`type=${outputChunk.type}`, { chunk: outputChunk }) yield outputChunk this.middlewareCtx.chunkIndex++ } // Handle usage via middleware if (chunk.type === 'RUN_FINISHED' && chunk.usage) { await this.middlewareRunner.runOnUsage(this.middlewareCtx, chunk.usage) } if (this.earlyTermination) { break } } } private handleStreamChunk(chunk: StreamChunk): void { // eslint-disable-next-line @typescript-eslint/switch-exhaustiveness-check -- AG-UI EventType enum members vs string-literal case labels; default branch handles untraced events. switch (chunk.type) { // AG-UI Events case 'TEXT_MESSAGE_CONTENT': this.handleTextMessageContentEvent(chunk) break case 'TOOL_CALL_START': this.handleToolCallStartEvent(chunk) break case 'TOOL_CALL_ARGS': this.handleToolCallArgsEvent(chunk) break case 'TOOL_CALL_END': this.handleToolCallEndEvent(chunk) break case 'RUN_FINISHED': this.handleRunFinishedEvent(chunk) break case 'RUN_ERROR': this.handleRunErrorEvent(chunk) break case 'STEP_STARTED': this.handleStepStartedEvent() break case 'STEP_FINISHED': this.handleStepFinishedEvent(chunk) break case 'TOOL_CALL_RESULT': // Tool result is already added to messages in buildToolResultChunks break case 'REASONING_START': case 'REASONING_MESSAGE_START': case 'REASONING_MESSAGE_CONTENT': case 'REASONING_MESSAGE_END': case 'REASONING_END': // Reasoning events are handled by StreamProcessor break default: // RUN_STARTED, TEXT_MESSAGE_START, TEXT_MESSAGE_END, // STATE_SNAPSHOT, STATE_DELTA, CUSTOM // - no special handling needed in chat activity break } } // =========================== // AG-UI Event Handlers // =========================== private handleTextMessageContentEvent(chunk: TextMessageContentEvent): void { if (chunk.content) { this.accumulatedContent = chunk.content } else { this.accumulatedContent += chunk.delta } this.middlewareCtx.accumulatedContent = this.accumulatedContent } private handleToolCallStartEvent(chunk: ToolCallStartEvent): void { this.toolCallManager.addToolCallStartEvent(chunk) } private handleToolCallArgsEvent(chunk: ToolCallArgsEvent): void { this.toolCallManager.addToolCallArgsEvent(chunk) } private handleToolCallEndEvent(chunk: ToolCallEndEvent): void { this.toolCallManager.completeToolCall(chunk) } private handleRunFinishedEvent(chunk: RunFinishedEvent): void { this.finishedEvent = chunk this.lastFinishReason = chunk.finishReason ?? null } private handleRunErrorEvent( _chunk: Extract<StreamChunk, { type: 'RUN_ERROR' }>, ): void { this.earlyTermination = true } private finalizeCurrentThinkingStep(): void { if (this.currentThinkingContent) { this.accumulatedThinking.push({ content: this.currentThinkingContent, ...(this.currentThinkingSignature && { signature: this.currentThinkingSignature, }), }) this.currentThinkingContent = '' this.currentThinkingSignature = '' } } private handleStepStartedEvent(): void { this.finalizeCurrentThinkingStep() } private handleStepFinishedEvent( chunk: Extract<StreamChunk, { type: 'STEP_FINISHED' }>, ): void { if (chunk.delta) { this.currentThinkingContent += chunk.delta } if (chunk.signature) { this.currentThinkingSignature = chunk.signature } } private async *checkForPendingToolCalls(): AsyncGenerator< StreamChunk, ToolPhaseResult, void > { const pendingToolCalls = this.getPendingToolCallsFromMessages() if (pendingToolCalls.length === 0) { return 'continue' } const finishEvent = this.createSyntheticFinishedEvent() // Handle undiscovered lazy tool calls with self-correcting error messages const undiscoveredLazyResults: Array<ToolResult> = [] const executablePendingCalls = pendingToolCalls.filter((tc) => { if (this.lazyToolManager.isUndiscoveredLazyTool(tc.function.name)) { undiscoveredLazyResults.push({ toolCallId: tc.id, toolName: tc.function.name, result: { error: this.lazyToolManager.getUndiscoveredToolError( tc.function.name, ), }, state: 'output-error', }) return false } return true }) if (undiscoveredLazyResults.length > 0) { for (const chunk of this.buildToolResultChunks( undiscoveredLazyResults, finishEvent, )) { yield* this.pipeThroughMiddleware(chunk) } } if (executablePendingCalls.length === 0) { return 'continue' } const { approvals, clientToolResults } = this.collectClientState() const generator = executeToolCalls( executablePendingCalls, this.tools, approvals, clientToolResults, (eventName, data) => this.createCustomEventChunk(eventName, data), { onBeforeToolCall: async (toolCall, tool, args) => { this.logger.tools(`phase=before name=${toolCall.function.name}`, { name: toolCall.function.name, args, }) const hookCtx = { toolCall, tool, args, toolName: toolCall.function.name, toolCallId: toolCall.id, } return this.middlewareRunner.runOnBeforeToolCall( this.middlewareCtx, hookCtx, ) }, onAfterToolCall: async (info) => { this.logger.tools(`phase=after name=${info.toolName}`, { name: info.toolName, result: info.result, }) await this.middlewareRunner.runOnAfterToolCall( this.middlewareCtx, info, ) }, }, this.middlewareCtx.context, this.toolAbortSignal, ) // Consume the async generator, yielding custom events and collecting the return value const executionResult = yield* this.drainToolCallGenerator(generator) // Check if middleware aborted during pending tool execution if (this.isMiddlewareAborted()) { this.setToolPhase('stop') return 'stop' } // Notify middleware of tool phase completion (devtools emits aggregate events here) await this.middlewareRunner.runOnToolPhaseComplete(this.middlewareCtx, { toolCalls: pendingToolCalls, results: executionResult.results, needsApproval: executionResult.needsApproval, needsClientExecution: executionResult.needsClientExecution, }) // Build args lookup so buildToolResultChunks can emit TOOL_CALL_START + // TOOL_CALL_ARGS before TOOL_CALL_END during continuation re-executions. const argsMap = new Map<string, string>() for (const tc of pendingToolCalls) { argsMap.set(tc.id, tc.function.arguments) } if ( executionResult.needsApproval.length > 0 || executionResult.needsClientExecution.length > 0 ) { if (executionResult.results.length > 0) { for (const chunk of this.buildToolResultChunks( executionResult.results, finishEvent, argsMap, )) { yield* this.pipeThroughMiddleware(chunk) } } for (const chunk of this.buildApprovalChunks( executionResult.needsApproval, finishEvent, )) { yield* this.pipeThroughMiddleware(chunk) } for (const chunk of this.buildClientToolChunks( executionResult.needsClientExecution, finishEvent, )) { yield* this.pipeThroughMiddleware(chunk) } this.setToolPhase('wait') return 'wait' } const toolResultChunks = this.buildToolResultChunks( executionResult.results, finishEvent, argsMap, ) for (const chunk of toolResultChunks) { yield* this.pipeThroughMiddleware(chunk) } return 'continue' } private async *processToolCalls(): AsyncGenerator<StreamChunk, void, void> { if (!this.shouldExecuteToolPhase()) { this.setToolPhase('stop') return } const toolCalls = this.toolCallManager.getToolCalls() const finishEvent = this.finishedEvent if (!finishEvent || toolCalls.length === 0) { this.setToolPhase('stop') return } this.addAssistantToolCallMessage(toolCalls) // Handle undiscovered lazy tool calls with self-correcting error messages const undiscoveredLazyResults: Array<ToolResult> = [] const executableToolCalls = toolCalls.filter((tc) => { if (this.lazyToolManager.isUndiscoveredLazyTool(tc.function.name)) { undiscoveredLazyResults.push({ toolCallId: tc.id, toolName: tc.function.name, result: { error: this.lazyToolManager.getUndiscoveredToolError( tc.function.name, ), }, state: 'output-error', }) return false } return true }) if (undiscoveredLazyResults.length > 0 && this.finishedEvent) { for (const chunk of this.buildToolResultChunks( undiscoveredLazyResults, this.finishedEvent, )) { yield* this.pipeThroughMiddleware(chunk) } } if (executableToolCalls.length === 0) { // All tool calls were undiscovered lazy tools — errors emitted, continue loop this.toolCallManager.clear() this.setToolPhase('continue') return } this.middlewareCtx.phase = 'beforeTools' const { approvals, clientToolResults } = this.collectClientState() const generator = executeToolCalls( executableToolCalls, this.tools, approvals, clientToolResults, (eventName, data) => this.createCustomEventChunk(eventName, data), { onBeforeToolCall: async (toolCall, tool, args) => { this.logger.tools(`phase=before name=${toolCall.function.name}`, { name: toolCall.function.name, args, }) const hookCtx = { toolCall, tool, args, toolName: tool