UNPKG

ai

Version:

AI SDK by Vercel - The AI Toolkit for TypeScript and JavaScript

1,478 lines (1,321 loc) 86.9 kB
import { getErrorMessage, LanguageModelV3, SharedV3Warning, UnsupportedFunctionalityError, } from '@ai-sdk/provider'; import { createIdGenerator, DelayedPromise, IdGenerator, isAbortError, ProviderOptions, ToolApprovalResponse, ToolContent, } from '@ai-sdk/provider-utils'; import { Span } from '@opentelemetry/api'; import { ServerResponse } from 'node:http'; import { NoOutputGeneratedError } from '../error'; import { logWarnings } from '../logger/log-warnings'; import { resolveLanguageModel } from '../model/resolve-model'; import { CallSettings, getChunkTimeoutMs, getStepTimeoutMs, getTotalTimeoutMs, } from '../prompt/call-settings'; import { convertToLanguageModelPrompt } from '../prompt/convert-to-language-model-prompt'; import { createToolModelOutput } from '../prompt/create-tool-model-output'; import { prepareCallSettings } from '../prompt/prepare-call-settings'; import { prepareToolsAndToolChoice } from '../prompt/prepare-tools-and-tool-choice'; import { Prompt } from '../prompt/prompt'; import { standardizePrompt } from '../prompt/standardize-prompt'; import { wrapGatewayError } from '../prompt/wrap-gateway-error'; import { assembleOperationName } from '../telemetry/assemble-operation-name'; import { getBaseTelemetryAttributes } from '../telemetry/get-base-telemetry-attributes'; import { getTracer } from '../telemetry/get-tracer'; import { recordSpan } from '../telemetry/record-span'; import { selectTelemetryAttributes } from '../telemetry/select-telemetry-attributes'; import { stringifyForTelemetry } from '../telemetry/stringify-for-telemetry'; import { TelemetrySettings } from '../telemetry/telemetry-settings'; import { createTextStreamResponse } from '../text-stream/create-text-stream-response'; import { pipeTextStreamToResponse } from '../text-stream/pipe-text-stream-to-response'; import { LanguageModelRequestMetadata } from '../types'; import { CallWarning, FinishReason, LanguageModel, ToolChoice, } from '../types/language-model'; import { ProviderMetadata } from '../types/provider-metadata'; import { addLanguageModelUsage, createNullLanguageModelUsage, LanguageModelUsage, } from '../types/usage'; import { UIMessage } from '../ui'; import { createUIMessageStreamResponse } from '../ui-message-stream/create-ui-message-stream-response'; import { getResponseUIMessageId } from '../ui-message-stream/get-response-ui-message-id'; import { handleUIMessageStreamFinish } from '../ui-message-stream/handle-ui-message-stream-finish'; import { pipeUIMessageStreamToResponse } from '../ui-message-stream/pipe-ui-message-stream-to-response'; import { InferUIMessageChunk, UIMessageChunk, } from '../ui-message-stream/ui-message-chunks'; import { UIMessageStreamResponseInit } from '../ui-message-stream/ui-message-stream-response-init'; import { InferUIMessageData, InferUIMessageMetadata } from '../ui/ui-messages'; import { asArray } from '../util/as-array'; import { AsyncIterableStream, createAsyncIterableStream, } from '../util/async-iterable-stream'; import { consumeStream } from '../util/consume-stream'; import { createStitchableStream } from '../util/create-stitchable-stream'; import { DownloadFunction } from '../util/download/download-function'; import { mergeAbortSignals } from '../util/merge-abort-signals'; import { mergeObjects } from '../util/merge-objects'; import { now as originalNow } from '../util/now'; import { prepareRetries } from '../util/prepare-retries'; import { collectToolApprovals } from './collect-tool-approvals'; import { ContentPart } from './content-part'; import { executeToolCall } from './execute-tool-call'; import { Output, text } from './output'; import { InferCompleteOutput, InferElementOutput, InferPartialOutput, } from './output-utils'; import { PrepareStepFunction } from './prepare-step'; import { ResponseMessage } from './response-message'; import { runToolsTransformation, SingleRequestTextStreamPart, } from './run-tools-transformation'; import { DefaultStepResult, StepResult } from './step-result'; import { isStopConditionMet, stepCountIs, StopCondition, } from './stop-condition'; import { ConsumeStreamOptions, StreamTextResult, TextStreamPart, UIMessageStreamOptions, } from './stream-text-result'; import { toResponseMessages } from './to-response-messages'; import { TypedToolCall } from './tool-call'; import { ToolCallRepairFunction } from './tool-call-repair-function'; import { ToolOutput } from './tool-output'; import { StaticToolOutputDenied } from './tool-output-denied'; import { ToolSet } from './tool-set'; const originalGenerateId = createIdGenerator({ prefix: 'aitxt', size: 24, }); /** * A transformation that is applied to the stream. * * @param stopStream - A function that stops the source stream. * @param tools - The tools that are accessible to and can be called by the model. The model needs to support calling tools. */ export type StreamTextTransform<TOOLS extends ToolSet> = (options: { tools: TOOLS; // for type inference stopStream: () => void; }) => TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>; /** * Callback that is set using the `onError` option. * * @param event - The event that is passed to the callback. */ export type StreamTextOnErrorCallback = (event: { error: unknown; }) => PromiseLike<void> | void; /** * Callback that is set using the `onStepFinish` option. * * @param stepResult - The result of the step. */ export type StreamTextOnStepFinishCallback<TOOLS extends ToolSet> = ( stepResult: StepResult<TOOLS>, ) => PromiseLike<void> | void; /** * Callback that is set using the `onChunk` option. * * @param event - The event that is passed to the callback. */ export type StreamTextOnChunkCallback<TOOLS extends ToolSet> = (event: { chunk: Extract< TextStreamPart<TOOLS>, { type: | 'text-delta' | 'reasoning-delta' | 'source' | 'tool-call' | 'tool-input-start' | 'tool-input-delta' | 'tool-result' | 'raw'; } >; }) => PromiseLike<void> | void; /** * Callback that is set using the `onFinish` option. * * @param event - The event that is passed to the callback. */ export type StreamTextOnFinishCallback<TOOLS extends ToolSet> = ( event: StepResult<TOOLS> & { /** * Details for all steps. */ readonly steps: StepResult<TOOLS>[]; /** * Total usage for all steps. This is the sum of the usage of all steps. */ readonly totalUsage: LanguageModelUsage; /** * Context that is passed into tool execution. * * Experimental (can break in patch releases). * * @default undefined */ experimental_context: unknown; }, ) => PromiseLike<void> | void; /** * Callback that is set using the `onAbort` option. * * @param event - The event that is passed to the callback. */ export type StreamTextOnAbortCallback<TOOLS extends ToolSet> = (event: { /** * Details for all previously finished steps. */ readonly steps: StepResult<TOOLS>[]; }) => PromiseLike<void> | void; /** * Generate a text and call tools for a given prompt using a language model. * * This function streams the output. If you do not want to stream the output, use `generateText` instead. * * @param model - The language model to use. * @param tools - Tools that are accessible to and can be called by the model. The model needs to support calling tools. * * @param system - A system message that will be part of the prompt. * @param prompt - A simple text prompt. You can either use `prompt` or `messages` but not both. * @param messages - A list of messages. You can either use `prompt` or `messages` but not both. * * @param maxOutputTokens - Maximum number of tokens to generate. * @param temperature - Temperature setting. * The value is passed through to the provider. The range depends on the provider and model. * It is recommended to set either `temperature` or `topP`, but not both. * @param topP - Nucleus sampling. * The value is passed through to the provider. The range depends on the provider and model. * It is recommended to set either `temperature` or `topP`, but not both. * @param topK - Only sample from the top K options for each subsequent token. * Used to remove "long tail" low probability responses. * Recommended for advanced use cases only. You usually only need to use temperature. * @param presencePenalty - Presence penalty setting. * It affects the likelihood of the model to repeat information that is already in the prompt. * The value is passed through to the provider. The range depends on the provider and model. * @param frequencyPenalty - Frequency penalty setting. * It affects the likelihood of the model to repeatedly use the same words or phrases. * The value is passed through to the provider. The range depends on the provider and model. * @param stopSequences - Stop sequences. * If set, the model will stop generating text when one of the stop sequences is generated. * @param seed - The seed (integer) to use for random sampling. * If set and supported by the model, calls will generate deterministic results. * * @param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2. * @param abortSignal - An optional abort signal that can be used to cancel the call. * @param timeout - An optional timeout in milliseconds. The call will be aborted if it takes longer than the specified timeout. * @param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers. * * @param onChunk - Callback that is called for each chunk of the stream. The stream processing will pause until the callback promise is resolved. * @param onError - Callback that is called when an error occurs during streaming. You can use it to log errors. * @param onStepFinish - Callback that is called when each step (LLM call) is finished, including intermediate steps. * @param onFinish - Callback that is called when all steps are finished and the response is complete. * * @returns * A result object for accessing different stream types and additional information. */ export function streamText< TOOLS extends ToolSet, OUTPUT extends Output = Output<string, string, never>, >({ model, tools, toolChoice, system, prompt, messages, maxRetries, abortSignal, timeout, headers, stopWhen = stepCountIs(1), experimental_output, output = experimental_output, experimental_telemetry: telemetry, prepareStep, providerOptions, experimental_activeTools, activeTools = experimental_activeTools, experimental_repairToolCall: repairToolCall, experimental_transform: transform, experimental_download: download, includeRawChunks = false, onChunk, onError = ({ error }) => { console.error(error); }, onFinish, onAbort, onStepFinish, experimental_context, experimental_include: include, _internal: { now = originalNow, generateId = originalGenerateId } = {}, ...settings }: CallSettings & Prompt & { /** * The language model to use. */ model: LanguageModel; /** * The tools that the model can call. The model needs to support calling tools. */ tools?: TOOLS; /** * The tool choice strategy. Default: 'auto'. */ toolChoice?: ToolChoice<TOOLS>; /** * Condition for stopping the generation when there are tool results in the last step. * When the condition is an array, any of the conditions can be met to stop the generation. * * @default stepCountIs(1) */ stopWhen?: | StopCondition<NoInfer<TOOLS>> | Array<StopCondition<NoInfer<TOOLS>>>; /** * Optional telemetry configuration (experimental). */ experimental_telemetry?: TelemetrySettings; /** * Additional provider-specific options. They are passed through * to the provider from the AI SDK and enable provider-specific * functionality that can be fully encapsulated in the provider. */ providerOptions?: ProviderOptions; /** * @deprecated Use `activeTools` instead. */ experimental_activeTools?: Array<keyof NoInfer<TOOLS>>; /** * Limits the tools that are available for the model to call without * changing the tool call and result types in the result. */ activeTools?: Array<keyof NoInfer<TOOLS>>; /** * Optional specification for parsing structured outputs from the LLM response. */ output?: OUTPUT; /** * Optional specification for parsing structured outputs from the LLM response. * * @deprecated Use `output` instead. */ experimental_output?: OUTPUT; /** * Optional function that you can use to provide different settings for a step. * * @param options - The options for the step. * @param options.steps - The steps that have been executed so far. * @param options.stepNumber - The number of the step that is being executed. * @param options.model - The model that is being used. * * @returns An object that contains the settings for the step. * If you return undefined (or for undefined settings), the settings from the outer level will be used. */ prepareStep?: PrepareStepFunction<NoInfer<TOOLS>>; /** * A function that attempts to repair a tool call that failed to parse. */ experimental_repairToolCall?: ToolCallRepairFunction<TOOLS>; /** * Optional stream transformations. * They are applied in the order they are provided. * The stream transformations must maintain the stream structure for streamText to work correctly. */ experimental_transform?: | StreamTextTransform<TOOLS> | Array<StreamTextTransform<TOOLS>>; /** * Custom download function to use for URLs. * * By default, files are downloaded if the model does not support the URL for the given media type. */ experimental_download?: DownloadFunction | undefined; /** * Whether to include raw chunks from the provider in the stream. * When enabled, you will receive raw chunks with type 'raw' that contain the unprocessed data from the provider. * This allows access to cutting-edge provider features not yet wrapped by the AI SDK. * Defaults to false. */ includeRawChunks?: boolean; /** * Callback that is called for each chunk of the stream. * The stream processing will pause until the callback promise is resolved. */ onChunk?: StreamTextOnChunkCallback<TOOLS>; /** * Callback that is invoked when an error occurs during streaming. * You can use it to log errors. * The stream processing will pause until the callback promise is resolved. */ onError?: StreamTextOnErrorCallback; /** * Callback that is called when the LLM response and all request tool executions * (for tools that have an `execute` function) are finished. * * The usage is the combined usage of all steps. */ onFinish?: StreamTextOnFinishCallback<TOOLS>; onAbort?: StreamTextOnAbortCallback<TOOLS>; /** * Callback that is called when each step (LLM call) is finished, including intermediate steps. */ onStepFinish?: StreamTextOnStepFinishCallback<TOOLS>; /** * Context that is passed into tool execution. * * Experimental (can break in patch releases). * * @default undefined */ experimental_context?: unknown; /** * Settings for controlling what data is included in step results. * Disabling inclusion can help reduce memory usage when processing * large payloads like images. * * By default, all data is included for backwards compatibility. */ experimental_include?: { /** * Whether to retain the request body in step results. * The request body can be large when sending images or files. * @default true */ requestBody?: boolean; }; /** * Internal. For test use only. May change without notice. */ _internal?: { now?: () => number; generateId?: IdGenerator; }; }): StreamTextResult<TOOLS, OUTPUT> { const totalTimeoutMs = getTotalTimeoutMs(timeout); const stepTimeoutMs = getStepTimeoutMs(timeout); const chunkTimeoutMs = getChunkTimeoutMs(timeout); const stepAbortController = stepTimeoutMs != null ? new AbortController() : undefined; const chunkAbortController = chunkTimeoutMs != null ? new AbortController() : undefined; return new DefaultStreamTextResult<TOOLS, OUTPUT>({ model: resolveLanguageModel(model), telemetry, headers, settings, maxRetries, abortSignal: mergeAbortSignals( abortSignal, totalTimeoutMs != null ? AbortSignal.timeout(totalTimeoutMs) : undefined, stepAbortController?.signal, chunkAbortController?.signal, ), stepTimeoutMs, stepAbortController, chunkTimeoutMs, chunkAbortController, system, prompt, messages, tools, toolChoice, transforms: asArray(transform), activeTools, repairToolCall, stopConditions: asArray(stopWhen), output, providerOptions, prepareStep, includeRawChunks, onChunk, onError, onFinish, onAbort, onStepFinish, now, generateId, experimental_context, download, include, }); } export type EnrichedStreamPart<TOOLS extends ToolSet, PARTIAL_OUTPUT> = { part: TextStreamPart<TOOLS>; partialOutput: PARTIAL_OUTPUT | undefined; }; function createOutputTransformStream< TOOLS extends ToolSet, OUTPUT extends Output, >( output: OUTPUT, ): TransformStream< TextStreamPart<TOOLS>, EnrichedStreamPart<TOOLS, InferPartialOutput<OUTPUT>> > { let firstTextChunkId: string | undefined = undefined; let text = ''; let textChunk = ''; let textProviderMetadata: ProviderMetadata | undefined = undefined; let lastPublishedJson = ''; function publishTextChunk({ controller, partialOutput = undefined, }: { controller: TransformStreamDefaultController< EnrichedStreamPart<TOOLS, InferPartialOutput<OUTPUT>> >; partialOutput?: InferPartialOutput<OUTPUT>; }) { controller.enqueue({ part: { type: 'text-delta', id: firstTextChunkId!, text: textChunk, providerMetadata: textProviderMetadata, }, partialOutput, }); textChunk = ''; } return new TransformStream< TextStreamPart<TOOLS>, EnrichedStreamPart<TOOLS, InferPartialOutput<OUTPUT>> >({ async transform(chunk, controller) { // ensure that we publish the last text chunk before the step finish: if (chunk.type === 'finish-step' && textChunk.length > 0) { publishTextChunk({ controller }); } if ( chunk.type !== 'text-delta' && chunk.type !== 'text-start' && chunk.type !== 'text-end' ) { controller.enqueue({ part: chunk, partialOutput: undefined }); return; } // we have to pick a text chunk which contains the json text // since we are streaming, we have to pick the first text chunk if (firstTextChunkId == null) { firstTextChunkId = chunk.id; } else if (chunk.id !== firstTextChunkId) { controller.enqueue({ part: chunk, partialOutput: undefined }); return; } if (chunk.type === 'text-start') { controller.enqueue({ part: chunk, partialOutput: undefined }); return; } if (chunk.type === 'text-end') { if (textChunk.length > 0) { publishTextChunk({ controller }); } controller.enqueue({ part: chunk, partialOutput: undefined }); return; } text += chunk.text; textChunk += chunk.text; textProviderMetadata = chunk.providerMetadata ?? textProviderMetadata; // only publish if partial json can be parsed: const result = await output.parsePartialOutput({ text }); // null should be allowed (valid JSON value) but undefined should not: if (result !== undefined) { // only send new json if it has changed: const currentJson = JSON.stringify(result.partial); if (currentJson !== lastPublishedJson) { publishTextChunk({ controller, partialOutput: result.partial }); lastPublishedJson = currentJson; } } }, }); } class DefaultStreamTextResult<TOOLS extends ToolSet, OUTPUT extends Output> implements StreamTextResult<TOOLS, OUTPUT> { private readonly _totalUsage = new DelayedPromise< Awaited<StreamTextResult<TOOLS, OUTPUT>['usage']> >(); private readonly _finishReason = new DelayedPromise< Awaited<StreamTextResult<TOOLS, OUTPUT>['finishReason']> >(); private readonly _rawFinishReason = new DelayedPromise< Awaited<StreamTextResult<TOOLS, OUTPUT>['rawFinishReason']> >(); private readonly _steps = new DelayedPromise< Awaited<StreamTextResult<TOOLS, OUTPUT>['steps']> >(); private readonly addStream: ( stream: ReadableStream<TextStreamPart<TOOLS>>, ) => void; private readonly closeStream: () => void; private baseStream: ReadableStream< EnrichedStreamPart<TOOLS, InferPartialOutput<OUTPUT>> >; private outputSpecification: OUTPUT | undefined; private includeRawChunks: boolean; private tools: TOOLS | undefined; constructor({ model, telemetry, headers, settings, maxRetries: maxRetriesArg, abortSignal, stepTimeoutMs, stepAbortController, chunkTimeoutMs, chunkAbortController, system, prompt, messages, tools, toolChoice, transforms, activeTools, repairToolCall, stopConditions, output, providerOptions, prepareStep, includeRawChunks, now, generateId, onChunk, onError, onFinish, onAbort, onStepFinish, experimental_context, download, include, }: { model: LanguageModelV3; telemetry: TelemetrySettings | undefined; headers: Record<string, string | undefined> | undefined; settings: Omit<CallSettings, 'abortSignal' | 'headers'>; maxRetries: number | undefined; abortSignal: AbortSignal | undefined; stepTimeoutMs: number | undefined; stepAbortController: AbortController | undefined; chunkTimeoutMs: number | undefined; chunkAbortController: AbortController | undefined; system: Prompt['system']; prompt: Prompt['prompt']; messages: Prompt['messages']; tools: TOOLS | undefined; toolChoice: ToolChoice<TOOLS> | undefined; transforms: Array<StreamTextTransform<TOOLS>>; activeTools: Array<keyof TOOLS> | undefined; repairToolCall: ToolCallRepairFunction<TOOLS> | undefined; stopConditions: Array<StopCondition<NoInfer<TOOLS>>>; output: OUTPUT | undefined; providerOptions: ProviderOptions | undefined; prepareStep: PrepareStepFunction<NoInfer<TOOLS>> | undefined; includeRawChunks: boolean; now: () => number; generateId: () => string; experimental_context: unknown; download: DownloadFunction | undefined; include: { requestBody?: boolean } | undefined; // callbacks: onChunk: undefined | StreamTextOnChunkCallback<TOOLS>; onError: StreamTextOnErrorCallback; onFinish: undefined | StreamTextOnFinishCallback<TOOLS>; onAbort: undefined | StreamTextOnAbortCallback<TOOLS>; onStepFinish: undefined | StreamTextOnStepFinishCallback<TOOLS>; }) { this.outputSpecification = output; this.includeRawChunks = includeRawChunks; this.tools = tools; // promise to ensure that the step has been fully processed by the event processor // before a new step is started. This is required because the continuation condition // needs the updated steps to determine if another step is needed. let stepFinish!: DelayedPromise<void>; let recordedContent: Array<ContentPart<TOOLS>> = []; const recordedResponseMessages: Array<ResponseMessage> = []; let recordedFinishReason: FinishReason | undefined = undefined; let recordedRawFinishReason: string | undefined = undefined; let recordedTotalUsage: LanguageModelUsage | undefined = undefined; let recordedRequest: LanguageModelRequestMetadata = {}; let recordedWarnings: Array<CallWarning> = []; const recordedSteps: StepResult<TOOLS>[] = []; // Track provider-executed tool calls that support deferred results // (e.g., code_execution in programmatic tool calling scenarios). // These tools may not return their results in the same turn as their call. const pendingDeferredToolCalls = new Map<string, { toolName: string }>(); let rootSpan!: Span; let activeTextContent: Record< string, { type: 'text'; text: string; providerMetadata: ProviderMetadata | undefined; } > = {}; let activeReasoningContent: Record< string, { type: 'reasoning'; text: string; providerMetadata: ProviderMetadata | undefined; } > = {}; const eventProcessor = new TransformStream< EnrichedStreamPart<TOOLS, InferPartialOutput<OUTPUT>>, EnrichedStreamPart<TOOLS, InferPartialOutput<OUTPUT>> >({ async transform(chunk, controller) { controller.enqueue(chunk); // forward the chunk to the next stream const { part } = chunk; if ( part.type === 'text-delta' || part.type === 'reasoning-delta' || part.type === 'source' || part.type === 'tool-call' || part.type === 'tool-result' || part.type === 'tool-input-start' || part.type === 'tool-input-delta' || part.type === 'raw' ) { await onChunk?.({ chunk: part }); } if (part.type === 'error') { await onError({ error: wrapGatewayError(part.error) }); } if (part.type === 'text-start') { activeTextContent[part.id] = { type: 'text', text: '', providerMetadata: part.providerMetadata, }; recordedContent.push(activeTextContent[part.id]); } if (part.type === 'text-delta') { const activeText = activeTextContent[part.id]; if (activeText == null) { controller.enqueue({ part: { type: 'error', error: `text part ${part.id} not found`, }, partialOutput: undefined, }); return; } activeText.text += part.text; activeText.providerMetadata = part.providerMetadata ?? activeText.providerMetadata; } if (part.type === 'text-end') { const activeText = activeTextContent[part.id]; if (activeText == null) { controller.enqueue({ part: { type: 'error', error: `text part ${part.id} not found`, }, partialOutput: undefined, }); return; } activeText.providerMetadata = part.providerMetadata ?? activeText.providerMetadata; delete activeTextContent[part.id]; } if (part.type === 'reasoning-start') { activeReasoningContent[part.id] = { type: 'reasoning', text: '', providerMetadata: part.providerMetadata, }; recordedContent.push(activeReasoningContent[part.id]); } if (part.type === 'reasoning-delta') { const activeReasoning = activeReasoningContent[part.id]; if (activeReasoning == null) { controller.enqueue({ part: { type: 'error', error: `reasoning part ${part.id} not found`, }, partialOutput: undefined, }); return; } activeReasoning.text += part.text; activeReasoning.providerMetadata = part.providerMetadata ?? activeReasoning.providerMetadata; } if (part.type === 'reasoning-end') { const activeReasoning = activeReasoningContent[part.id]; if (activeReasoning == null) { controller.enqueue({ part: { type: 'error', error: `reasoning part ${part.id} not found`, }, partialOutput: undefined, }); return; } activeReasoning.providerMetadata = part.providerMetadata ?? activeReasoning.providerMetadata; delete activeReasoningContent[part.id]; } if (part.type === 'file') { recordedContent.push({ type: 'file', file: part.file }); } if (part.type === 'source') { recordedContent.push(part); } if (part.type === 'tool-call') { recordedContent.push(part); } if (part.type === 'tool-result' && !part.preliminary) { recordedContent.push(part); } if (part.type === 'tool-approval-request') { recordedContent.push(part); } if (part.type === 'tool-error') { recordedContent.push(part); } if (part.type === 'start-step') { // reset the recorded data when a new step starts: recordedContent = []; activeReasoningContent = {}; activeTextContent = {}; recordedRequest = part.request; recordedWarnings = part.warnings; } if (part.type === 'finish-step') { const stepMessages = await toResponseMessages({ content: recordedContent, tools, }); // Add step information (after response messages are updated): const currentStepResult: StepResult<TOOLS> = new DefaultStepResult({ content: recordedContent, finishReason: part.finishReason, rawFinishReason: part.rawFinishReason, usage: part.usage, warnings: recordedWarnings, request: recordedRequest, response: { ...part.response, messages: [...recordedResponseMessages, ...stepMessages], }, providerMetadata: part.providerMetadata, }); await onStepFinish?.(currentStepResult); logWarnings({ warnings: recordedWarnings, provider: model.provider, model: model.modelId, }); recordedSteps.push(currentStepResult); recordedResponseMessages.push(...stepMessages); // resolve the promise to signal that the step has been fully processed // by the event processor: stepFinish.resolve(); } if (part.type === 'finish') { recordedTotalUsage = part.totalUsage; recordedFinishReason = part.finishReason; recordedRawFinishReason = part.rawFinishReason; } }, async flush(controller) { try { if (recordedSteps.length === 0) { const error = abortSignal?.aborted ? abortSignal.reason : new NoOutputGeneratedError({ message: 'No output generated. Check the stream for errors.', }); self._finishReason.reject(error); self._rawFinishReason.reject(error); self._totalUsage.reject(error); self._steps.reject(error); return; // no steps recorded (e.g. in error scenario) } // derived: const finishReason = recordedFinishReason ?? 'other'; const totalUsage = recordedTotalUsage ?? createNullLanguageModelUsage(); // from finish: self._finishReason.resolve(finishReason); self._rawFinishReason.resolve(recordedRawFinishReason); self._totalUsage.resolve(totalUsage); // aggregate results: self._steps.resolve(recordedSteps); // call onFinish callback: const finalStep = recordedSteps[recordedSteps.length - 1]; await onFinish?.({ finishReason: finalStep.finishReason, rawFinishReason: finalStep.rawFinishReason, totalUsage, usage: finalStep.usage, content: finalStep.content, text: finalStep.text, reasoningText: finalStep.reasoningText, reasoning: finalStep.reasoning, files: finalStep.files, sources: finalStep.sources, toolCalls: finalStep.toolCalls, staticToolCalls: finalStep.staticToolCalls, dynamicToolCalls: finalStep.dynamicToolCalls, toolResults: finalStep.toolResults, staticToolResults: finalStep.staticToolResults, dynamicToolResults: finalStep.dynamicToolResults, request: finalStep.request, response: finalStep.response, warnings: finalStep.warnings, providerMetadata: finalStep.providerMetadata, steps: recordedSteps, experimental_context, }); // Add response information to the root span: rootSpan.setAttributes( await selectTelemetryAttributes({ telemetry, attributes: { 'ai.response.finishReason': finishReason, 'ai.response.text': { output: () => finalStep.text }, 'ai.response.reasoning': { output: () => finalStep.reasoningText, }, 'ai.response.toolCalls': { output: () => finalStep.toolCalls?.length ? JSON.stringify(finalStep.toolCalls) : undefined, }, 'ai.response.providerMetadata': JSON.stringify( finalStep.providerMetadata, ), 'ai.usage.inputTokens': totalUsage.inputTokens, 'ai.usage.outputTokens': totalUsage.outputTokens, 'ai.usage.totalTokens': totalUsage.totalTokens, 'ai.usage.reasoningTokens': totalUsage.reasoningTokens, 'ai.usage.cachedInputTokens': totalUsage.cachedInputTokens, }, }), ); } catch (error) { controller.error(error); } finally { rootSpan.end(); } }, }); // initialize the stitchable stream and the transformed stream: const stitchableStream = createStitchableStream<TextStreamPart<TOOLS>>(); this.addStream = stitchableStream.addStream; this.closeStream = stitchableStream.close; // resilient stream that handles abort signals and errors: const reader = stitchableStream.stream.getReader(); let stream = new ReadableStream<TextStreamPart<TOOLS>>({ async start(controller) { // send start event: controller.enqueue({ type: 'start' }); }, async pull(controller) { // abort handling: function abort() { onAbort?.({ steps: recordedSteps }); controller.enqueue({ type: 'abort', // The `reason` is usually of type DOMException, but it can also be of any type, // so we use getErrorMessage for serialization because it is already designed to accept values of the unknown type. // See: https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/reason ...(abortSignal?.reason !== undefined ? { reason: getErrorMessage(abortSignal.reason) } : {}), }); controller.close(); } try { const { done, value } = await reader.read(); if (done) { controller.close(); return; } if (abortSignal?.aborted) { abort(); return; } controller.enqueue(value); } catch (error) { if (isAbortError(error) && abortSignal?.aborted) { abort(); } else { controller.error(error); } } }, cancel(reason) { return stitchableStream.stream.cancel(reason); }, }); // transform the stream before output parsing // to enable replacement of stream segments: for (const transform of transforms) { stream = stream.pipeThrough( transform({ tools: tools as TOOLS, stopStream() { stitchableStream.terminate(); }, }), ); } this.baseStream = stream .pipeThrough(createOutputTransformStream(output ?? text())) .pipeThrough(eventProcessor); const { maxRetries, retry } = prepareRetries({ maxRetries: maxRetriesArg, abortSignal, }); const tracer = getTracer(telemetry); const callSettings = prepareCallSettings(settings); const baseTelemetryAttributes = getBaseTelemetryAttributes({ model, telemetry, headers, settings: { ...callSettings, maxRetries }, }); const self = this; recordSpan({ name: 'ai.streamText', attributes: selectTelemetryAttributes({ telemetry, attributes: { ...assembleOperationName({ operationId: 'ai.streamText', telemetry }), ...baseTelemetryAttributes, // specific settings that only make sense on the outer level: 'ai.prompt': { input: () => JSON.stringify({ system, prompt, messages }), }, }, }), tracer, endWhenDone: false, fn: async rootSpanArg => { rootSpan = rootSpanArg; const initialPrompt = await standardizePrompt({ system, prompt, messages, } as Prompt); const initialMessages = initialPrompt.messages; const initialResponseMessages: Array<ResponseMessage> = []; const { approvedToolApprovals, deniedToolApprovals } = collectToolApprovals<TOOLS>({ messages: initialMessages }); // initial tool execution step stream if ( deniedToolApprovals.length > 0 || approvedToolApprovals.length > 0 ) { const providerExecutedToolApprovals = [ ...approvedToolApprovals, ...deniedToolApprovals, ].filter(toolApproval => toolApproval.toolCall.providerExecuted); const localApprovedToolApprovals = approvedToolApprovals.filter( toolApproval => !toolApproval.toolCall.providerExecuted, ); const localDeniedToolApprovals = deniedToolApprovals.filter( toolApproval => !toolApproval.toolCall.providerExecuted, ); const deniedProviderExecutedToolApprovals = deniedToolApprovals.filter( toolApproval => toolApproval.toolCall.providerExecuted, ); let toolExecutionStepStreamController: | ReadableStreamDefaultController<TextStreamPart<TOOLS>> | undefined; const toolExecutionStepStream = new ReadableStream< TextStreamPart<TOOLS> >({ start(controller) { toolExecutionStepStreamController = controller; }, }); self.addStream(toolExecutionStepStream); try { for (const toolApproval of [ ...localDeniedToolApprovals, ...deniedProviderExecutedToolApprovals, ]) { toolExecutionStepStreamController?.enqueue({ type: 'tool-output-denied', toolCallId: toolApproval.toolCall.toolCallId, toolName: toolApproval.toolCall.toolName, } as StaticToolOutputDenied<TOOLS>); } const toolOutputs: Array<ToolOutput<TOOLS>> = []; await Promise.all( localApprovedToolApprovals.map(async toolApproval => { const result = await executeToolCall({ toolCall: toolApproval.toolCall, tools, tracer, telemetry, messages: initialMessages, abortSignal, experimental_context, onPreliminaryToolResult: result => { toolExecutionStepStreamController?.enqueue(result); }, }); if (result != null) { toolExecutionStepStreamController?.enqueue(result); toolOutputs.push(result); } }), ); // forward provider-executed approval responses to the provider (do not execute locally): if (providerExecutedToolApprovals.length > 0) { initialResponseMessages.push({ role: 'tool', content: providerExecutedToolApprovals.map( toolApproval => ({ type: 'tool-approval-response', approvalId: toolApproval.approvalResponse.approvalId, approved: toolApproval.approvalResponse.approved, reason: toolApproval.approvalResponse.reason, providerExecuted: true, }) satisfies ToolApprovalResponse, ), }); } // Local tool results (approved + denied) are sent as tool results: if (toolOutputs.length > 0 || localDeniedToolApprovals.length > 0) { const localToolContent: ToolContent = []; // add regular tool results for approved tool calls: for (const output of toolOutputs) { localToolContent.push({ type: 'tool-result' as const, toolCallId: output.toolCallId, toolName: output.toolName, output: await createToolModelOutput({ toolCallId: output.toolCallId, input: output.input, tool: tools?.[output.toolName], output: output.type === 'tool-result' ? output.output : output.error, errorMode: output.type === 'tool-error' ? 'json' : 'none', }), }); } // add execution denied tool results for denied local tool approvals: for (const toolApproval of localDeniedToolApprovals) { localToolContent.push({ type: 'tool-result' as const, toolCallId: toolApproval.toolCall.toolCallId, toolName: toolApproval.toolCall.toolName, output: { type: 'execution-denied' as const, reason: toolApproval.approvalResponse.reason, }, }); } initialResponseMessages.push({ role: 'tool', content: localToolContent, }); } } finally { toolExecutionStepStreamController?.close(); } } recordedResponseMessages.push(...initialResponseMessages); async function streamStep({ currentStep, responseMessages, usage, }: { currentStep: number; responseMessages: Array<ResponseMessage>; usage: LanguageModelUsage; }) { const includeRawChunks = self.includeRawChunks; // Set up step timeout if configured const stepTimeoutId = stepTimeoutMs != null ? setTimeout(() => stepAbortController!.abort(), stepTimeoutMs) : undefined; // Set up chunk timeout tracking (will be reset on each chunk) let chunkTimeoutId: ReturnType<typeof setTimeout> | undefined = undefined; function resetChunkTimeout() { if (chunkTimeoutMs != null) { if (chunkTimeoutId != null) { clearTimeout(chunkTimeoutId); } chunkTimeoutId = setTimeout( () => chunkAbortController!.abort(), chunkTimeoutMs, ); } } function clearChunkTimeout() { if (chunkTimeoutId != null) { clearTimeout(chunkTimeoutId); chunkTimeoutId = undefined; } } function clearStepTimeout() { if (stepTimeoutId != null) { clearTimeout(stepTimeoutId); } } try { stepFinish = new DelayedPromise<void>(); const stepInputMessages = [...initialMessages, ...responseMessages]; const prepareStepResult = await prepareStep?.({ model, steps: recordedSteps, stepNumber: recordedSteps.length, messages: stepInputMessages, experimental_context, }); const stepModel = resolveLanguageModel( prepareStepResult?.model ?? model, ); const promptMessages = await convertToLanguageModelPrompt({ prompt: { system: prepareStepResult?.system ?? initialPrompt.system, messages: prepareStepResult?.messages ?? stepInputMessages, }, supportedUrls: await stepModel.supportedUrls, download, }); const { toolChoice: stepToolChoice, tools: stepTools } = await prepareToolsAndToolChoice({ tools, toolChoice: prepareStepResult?.toolChoice ?? toolChoice, activeTools: prepareStepResult?.activeTools ?? activeTools, }); experimental_context = prepareStepResult?.experimental_context ?? experimental_context; const stepProviderOptions = mergeObjects( providerOptions, prepareStepResult?.providerOptions, ); const { result: { stream, response, request }, doStreamSpan, startTimestampMs, } = await retry(() => recordSpan({ name: 'ai.streamText.doStream', attributes: selectTelemetryAttributes({ telemetry, attributes: { ...assembleOperationName({ operationId: 'ai.streamText.doStream', telemetry, }), ...baseTelemetryAttributes, // model: 'ai.model.provider': stepModel.provider, 'ai.model.id': stepModel.modelId, // prompt: 'ai.prompt.messages': { input: () => stringifyForTelemetry(promptMessages), }, 'ai.prompt.tools': { // convert the language model level tools: input: () => stepTools?.map(tool => JSON.stringify(tool)), }, 'ai.prompt.toolChoice': { input: () => stepToolChoice != null ? JSON.stringify(stepToolChoice) : undefined, }, // standardized gen-ai llm span attributes: 'gen_ai.system': stepModel.provider, 'gen_ai.request.model': stepModel.modelId, 'gen_ai.request.frequency_penalty': callSettings.frequencyPenalty, 'gen_ai.request.max_tokens': callSettings.maxOutputTokens, 'gen_ai.request.presence_penalty': callSettings.presencePenalty, 'gen_ai.request.stop_sequences': callSettings.stopSequences, 'gen_ai.request.temperature': callSettings.temperature, 'gen_ai.request.top_k': callSettings.topK, 'gen_ai.request.top_p': callSettings.topP, }, }), tracer, endWhenDone: false, fn: async doStreamSpan => ({ startTimestampMs: now(), // get before the call doStreamSpan, result: await stepModel.doStream({ ...callSettings, tools: stepTools, toolChoice: stepToolChoice, responseFormat: await output?.responseFormat, prompt: promptMessages, providerOptions: stepProviderOptions, abortSignal, headers, includeRawChunks, }), }), }), ); const streamWithToolResults = runToolsTransformation({ tools, generatorStream: stream, tracer, telemetry, system, messages: stepInputMessages, repairToolCall, abortSignal, experimental_context, generateId, }); // Conditionally include request.body based on include settings. // Large payloads (e.g., base64-encoded images) can cause memory issues. const stepRequest: LanguageModelRequestMetadata = (include?.requestBody ?? true)