@tanstack/ai
Version:
Core TanStack AI library - Open source AI SDK
1,261 lines (1,168 loc) • 37.2 kB
text/typescript
import type { StandardJSONSchemaV1 } from '@standard-schema/spec'
/**
* Tool call states - track the lifecycle of a tool call
*/
export type ToolCallState =
| 'awaiting-input' // Received start but no arguments yet
| 'input-streaming' // Partial arguments received
| 'input-complete' // All arguments received
| 'approval-requested' // Waiting for user approval
| 'approval-responded' // User has approved/denied
/**
* Tool result states - track the lifecycle of a tool result
*/
export type ToolResultState =
| 'streaming' // Placeholder for future streamed output
| 'complete' // Result is complete
| 'error' // Error occurred
/**
* JSON Schema type for defining tool input/output schemas as raw JSON Schema objects.
* This allows tools to be defined without schema libraries when you have JSON Schema definitions available.
*/
export interface JSONSchema {
type?: string | Array<string>
properties?: Record<string, JSONSchema>
items?: JSONSchema | Array<JSONSchema>
required?: Array<string>
enum?: Array<unknown>
const?: unknown
description?: string
default?: unknown
$ref?: string
$defs?: Record<string, JSONSchema>
definitions?: Record<string, JSONSchema>
allOf?: Array<JSONSchema>
anyOf?: Array<JSONSchema>
oneOf?: Array<JSONSchema>
not?: JSONSchema
if?: JSONSchema
then?: JSONSchema
else?: JSONSchema
minimum?: number
maximum?: number
exclusiveMinimum?: number
exclusiveMaximum?: number
minLength?: number
maxLength?: number
pattern?: string
format?: string
minItems?: number
maxItems?: number
uniqueItems?: boolean
additionalProperties?: boolean | JSONSchema
additionalItems?: boolean | JSONSchema
patternProperties?: Record<string, JSONSchema>
propertyNames?: JSONSchema
minProperties?: number
maxProperties?: number
title?: string
examples?: Array<unknown>
[]: any // Allow additional properties for extensibility
}
/**
* Union type for schema input - can be any Standard JSON Schema compliant schema or a plain JSONSchema object.
*
* Standard JSON Schema compliant libraries include:
* - Zod v4.2+ (natively supports StandardJSONSchemaV1)
* - ArkType v2.1.28+ (natively supports StandardJSONSchemaV1)
* - Valibot v1.2+ (via `toStandardJsonSchema()` from `@valibot/to-json-schema`)
*
* @see https://standardschema.dev/json-schema
*/
export type SchemaInput = StandardJSONSchemaV1<any, any> | JSONSchema
/**
* Infer the TypeScript type from a schema.
* For Standard JSON Schema compliant schemas, extracts the input type.
* For plain JSONSchema, returns `any` since we can't infer types from JSON Schema at compile time.
*/
export type InferSchemaType<T> =
T extends StandardJSONSchemaV1<infer TInput, unknown> ? TInput : unknown
export interface ToolCall {
id: string
type: 'function'
function: {
name: string
arguments: string // JSON string
}
/** Provider-specific metadata to carry through the tool call lifecycle */
providerMetadata?: Record<string, unknown>
}
// ============================================================================
// Multimodal Content Types
// ============================================================================
/**
* Supported input modality types for multimodal content.
* - 'text': Plain text content
* - 'image': Image content (base64 or URL)
* - 'audio': Audio content (base64 or URL)
* - 'video': Video content (base64 or URL)
* - 'document': Document content like PDFs (base64 or URL)
*/
export type Modality = 'text' | 'image' | 'audio' | 'video' | 'document'
/**
* Source specification for inline data content (base64).
* Requires a mimeType to ensure providers receive proper content type information.
*/
export interface ContentPartDataSource {
/**
* Indicates this is inline data content.
*/
type: 'data'
/**
* The base64-encoded content value.
*/
value: string
/**
* The MIME type of the content (e.g., 'image/png', 'audio/wav').
* Required for data sources to ensure proper handling by providers.
*/
mimeType: string
}
/**
* Source specification for URL-based content.
* mimeType is optional as it can often be inferred from the URL or response headers.
*/
export interface ContentPartUrlSource {
/**
* Indicates this is URL-referenced content.
*/
type: 'url'
/**
* HTTP(S) URL or data URI pointing to the content.
*/
value: string
/**
* Optional MIME type hint for cases where providers can't infer it from the URL.
*/
mimeType?: string
}
/**
* Source specification for multimodal content.
* Discriminated union supporting both inline data (base64) and URL-based content.
* - For 'data' sources: mimeType is required
* - For 'url' sources: mimeType is optional
*/
export type ContentPartSource = ContentPartDataSource | ContentPartUrlSource
/**
* Image content part for multimodal messages.
* @template TMetadata - Provider-specific metadata type (e.g., OpenAI's detail level)
*/
export interface ImagePart<TMetadata = unknown> {
type: 'image'
/** Source of the image content */
source: ContentPartSource
/** Provider-specific metadata (e.g., OpenAI's detail: 'auto' | 'low' | 'high') */
metadata?: TMetadata
}
/**
* Audio content part for multimodal messages.
* @template TMetadata - Provider-specific metadata type
*/
export interface AudioPart<TMetadata = unknown> {
type: 'audio'
/** Source of the audio content */
source: ContentPartSource
/** Provider-specific metadata (e.g., format, sample rate) */
metadata?: TMetadata
}
/**
* Video content part for multimodal messages.
* @template TMetadata - Provider-specific metadata type
*/
export interface VideoPart<TMetadata = unknown> {
type: 'video'
/** Source of the video content */
source: ContentPartSource
/** Provider-specific metadata (e.g., duration, resolution) */
metadata?: TMetadata
}
/**
* Document content part for multimodal messages (e.g., PDFs).
* @template TMetadata - Provider-specific metadata type (e.g., Anthropic's media_type)
*/
export interface DocumentPart<TMetadata = unknown> {
type: 'document'
/** Source of the document content */
source: ContentPartSource
/** Provider-specific metadata (e.g., media_type for PDFs) */
metadata?: TMetadata
}
/**
* Union type for all multimodal content parts.
* @template TImageMeta - Provider-specific image metadata type
* @template TAudioMeta - Provider-specific audio metadata type
* @template TVideoMeta - Provider-specific video metadata type
* @template TDocumentMeta - Provider-specific document metadata type
*/
export type ContentPart<
TTextMeta = unknown,
TImageMeta = unknown,
TAudioMeta = unknown,
TVideoMeta = unknown,
TDocumentMeta = unknown,
> =
| TextPart<TTextMeta>
| ImagePart<TImageMeta>
| AudioPart<TAudioMeta>
| VideoPart<TVideoMeta>
| DocumentPart<TDocumentMeta>
/**
* Helper type to filter ContentPart union to only include specific modalities.
* Used to constrain message content based on model capabilities.
*/
export type ContentPartForInputModalitiesTypes<
TInputModalitiesTypes extends InputModalitiesTypes,
> = Extract<
ContentPart<
TInputModalitiesTypes['messageMetadataByModality']['text'],
TInputModalitiesTypes['messageMetadataByModality']['image'],
TInputModalitiesTypes['messageMetadataByModality']['audio'],
TInputModalitiesTypes['messageMetadataByModality']['video'],
TInputModalitiesTypes['messageMetadataByModality']['document']
>,
{ type: TInputModalitiesTypes['inputModalities'][number] }
>
/**
* Helper type to convert a readonly array of modalities to a union type.
* e.g., readonly ['text', 'image'] -> 'text' | 'image'
*/
export type ModalitiesArrayToUnion<T extends ReadonlyArray<Modality>> =
T[number]
/**
* Type for message content constrained by supported modalities.
* When modalities is ['text', 'image'], only TextPart and ImagePart are allowed in the array.
*/
export type ConstrainedContent<
TInputModalitiesTypes extends InputModalitiesTypes,
> =
| string
| null
| Array<ContentPartForInputModalitiesTypes<TInputModalitiesTypes>>
export interface ModelMessage<
TContent extends string | null | Array<ContentPart> =
| string
| null
| Array<ContentPart>,
> {
role: 'user' | 'assistant' | 'tool'
content: TContent
name?: string
toolCalls?: Array<ToolCall>
toolCallId?: string
}
/**
* Message parts - building blocks of UIMessage
*/
export interface TextPart<TMetadata = unknown> {
type: 'text'
content: string
metadata?: TMetadata
}
export interface ToolCallPart {
type: 'tool-call'
id: string
name: string
arguments: string // JSON string (may be incomplete)
state: ToolCallState
/** Approval metadata if tool requires user approval */
approval?: {
id: string // Unique approval ID
needsApproval: boolean // Always true if present
approved?: boolean // User's decision (undefined until responded)
}
/** Tool execution output (for client tools or after approval) */
output?: any
}
export interface ToolResultPart {
type: 'tool-result'
toolCallId: string
content: string
state: ToolResultState
error?: string // Error message if state is "error"
}
export interface ThinkingPart {
type: 'thinking'
content: string
}
export type MessagePart =
| TextPart
| ImagePart
| AudioPart
| VideoPart
| DocumentPart
| ToolCallPart
| ToolResultPart
| ThinkingPart
/**
* UIMessage - Domain-specific message format optimized for building chat UIs
* Contains parts that can be text, tool calls, or tool results
*/
export interface UIMessage {
id: string
role: 'system' | 'user' | 'assistant'
parts: Array<MessagePart>
createdAt?: Date
}
export type InputModalitiesTypes = {
inputModalities: ReadonlyArray<Modality>
messageMetadataByModality: DefaultMessageMetadataByModality
}
/**
* A ModelMessage with content constrained to only allow content parts
* matching the specified input modalities.
*/
export type ConstrainedModelMessage<
TInputModalitiesTypes extends InputModalitiesTypes,
> = Omit<ModelMessage, 'content'> & {
content: ConstrainedContent<TInputModalitiesTypes>
}
/**
* Context passed to tool execute functions, providing capabilities like
* emitting custom events during execution.
*/
export interface ToolExecutionContext {
/** The ID of the tool call being executed */
toolCallId?: string
/**
* Emit a custom event during tool execution.
* Events are streamed to the client in real-time as AG-UI CUSTOM events.
*
* @param eventName - Name of the custom event
* @param value - Event payload value
*
* @example
* ```ts
* const tool = toolDefinition({ ... }).server(async (args, context) => {
* context?.emitCustomEvent('progress', { step: 1, total: 3 })
* // ... do work ...
* context?.emitCustomEvent('progress', { step: 2, total: 3 })
* // ... do more work ...
* return result
* })
* ```
*/
emitCustomEvent: (eventName: string, value: Record<string, any>) => void
}
/**
* Tool/Function definition for function calling.
*
* Tools allow the model to interact with external systems, APIs, or perform computations.
* The model will decide when to call tools based on the user's request and the tool descriptions.
*
* Tools can use any Standard JSON Schema compliant library (Zod, ArkType, Valibot, etc.)
* or plain JSON Schema objects for runtime validation and type safety.
*
* @see https://platform.openai.com/docs/guides/function-calling
* @see https://docs.anthropic.com/claude/docs/tool-use
* @see https://standardschema.dev/json-schema
*/
export interface Tool<
TInput extends SchemaInput = SchemaInput,
TOutput extends SchemaInput = SchemaInput,
TName extends string = string,
> {
/**
* Unique name of the tool (used by the model to call it).
*
* Should be descriptive and follow naming conventions (e.g., snake_case or camelCase).
* Must be unique within the tools array.
*
* @example "get_weather", "search_database", "sendEmail"
*/
name: TName
/**
* Clear description of what the tool does.
*
* This is crucial - the model uses this to decide when to call the tool.
* Be specific about what the tool does, what parameters it needs, and what it returns.
*
* @example "Get the current weather in a given location. Returns temperature, conditions, and forecast."
*/
description: string
/**
* Schema describing the tool's input parameters.
*
* Can be any Standard JSON Schema compliant schema (Zod, ArkType, Valibot, etc.) or a plain JSON Schema object.
* Defines the structure and types of arguments the tool accepts.
* The model will generate arguments matching this schema.
* Standard JSON Schema compliant schemas are converted to JSON Schema for LLM providers.
*
* @see https://standardschema.dev/json-schema
* @see https://json-schema.org/
*
* @example
* // Using Zod v4+ schema (natively supports Standard JSON Schema)
* import { z } from 'zod';
* z.object({
* location: z.string().describe("City name or coordinates"),
* unit: z.enum(["celsius", "fahrenheit"]).optional()
* })
*
* @example
* // Using ArkType (natively supports Standard JSON Schema)
* import { type } from 'arktype';
* type({
* location: 'string',
* unit: "'celsius' | 'fahrenheit'"
* })
*
* @example
* // Using plain JSON Schema
* {
* type: 'object',
* properties: {
* location: { type: 'string', description: 'City name or coordinates' },
* unit: { type: 'string', enum: ['celsius', 'fahrenheit'] }
* },
* required: ['location']
* }
*/
inputSchema?: TInput
/**
* Optional schema for validating tool output.
*
* Can be any Standard JSON Schema compliant schema or a plain JSON Schema object.
* If provided with a Standard Schema compliant schema, tool results will be validated
* against this schema before being sent back to the model. This catches bugs in tool
* implementations and ensures consistent output formatting.
*
* Note: This is client-side validation only - not sent to LLM providers.
* Note: Plain JSON Schema output validation is not performed at runtime.
*
* @example
* // Using Zod
* z.object({
* temperature: z.number(),
* conditions: z.string(),
* forecast: z.array(z.string()).optional()
* })
*/
outputSchema?: TOutput
/**
* Optional function to execute when the model calls this tool.
*
* If provided, the SDK will automatically execute the function with the model's arguments
* and feed the result back to the model. This enables autonomous tool use loops.
*
* Can return any value - will be automatically stringified if needed.
*
* @param args - The arguments parsed from the model's tool call (validated against inputSchema)
* @returns Result to send back to the model (validated against outputSchema if provided)
*
* @example
* execute: async (args) => {
* const weather = await fetchWeather(args.location);
* return weather; // Can return object or string
* }
*/
execute?: (args: any, context?: ToolExecutionContext) => Promise<any> | any
/** If true, tool execution requires user approval before running. Works with both server and client tools. */
needsApproval?: boolean
/** If true, this tool is lazy and will only be sent to the LLM after being discovered via the lazy tool discovery mechanism. Only meaningful when used with chat(). */
lazy?: boolean
/** Additional metadata for adapters or custom extensions */
metadata?: Record<string, any>
}
export interface ToolConfig {
[]: Tool
}
/**
* Structured output format specification.
*
* Constrains the model's output to match a specific JSON structure.
* Useful for extracting structured data, form filling, or ensuring consistent response formats.
*
* @see https://platform.openai.com/docs/guides/structured-outputs
* @see https://sdk.vercel.ai/docs/ai-sdk-core/structured-outputs
*
* @template TData - TypeScript type of the expected data structure (for type safety)
*/
export interface ResponseFormat<TData = any> {
/**
* Type of structured output.
*
* - "json_object": Forces the model to output valid JSON (any structure)
* - "json_schema": Validates output against a provided JSON Schema (strict structure)
*
* @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format
*/
type: 'json_object' | 'json_schema'
/**
* JSON schema specification (required when type is "json_schema").
*
* Defines the exact structure the model's output must conform to.
* OpenAI's structured outputs will guarantee the output matches this schema.
*/
json_schema?: {
/**
* Unique name for the schema.
*
* Used to identify the schema in logs and debugging.
* Should be descriptive (e.g., "user_profile", "search_results").
*/
name: string
/**
* Optional description of what the schema represents.
*
* Helps document the purpose of this structured output.
*
* @example "User profile information including name, email, and preferences"
*/
description?: string
/**
* JSON Schema definition for the expected output structure.
*
* Must be a valid JSON Schema (draft 2020-12 or compatible).
* The model's output will be validated against this schema.
*
* @see https://json-schema.org/
*
* @example
* {
* type: "object",
* properties: {
* name: { type: "string" },
* age: { type: "number" },
* email: { type: "string", format: "email" }
* },
* required: ["name", "email"],
* additionalProperties: false
* }
*/
schema: Record<string, any>
/**
* Whether to enforce strict schema validation.
*
* When true (recommended), the model guarantees output will match the schema exactly.
* When false, the model will "best effort" match the schema.
*
* Default: true (for providers that support it)
*
* @see https://platform.openai.com/docs/guides/structured-outputs#strict-mode
*/
strict?: boolean
}
/**
* Type-only property to carry the inferred data type.
*
* This is never set at runtime - it only exists for TypeScript type inference.
* Allows the SDK to know what type to expect when parsing the response.
*
* @internal
*/
__data?: TData
}
/**
* State passed to agent loop strategy for determining whether to continue
*/
export interface AgentLoopState {
/** Current iteration count (0-indexed) */
iterationCount: number
/** Current messages array */
messages: Array<ModelMessage>
/** Finish reason from the last response */
finishReason: string | null
}
/**
* Strategy function that determines whether the agent loop should continue
*
* @param state - Current state of the agent loop
* @returns true to continue looping, false to stop
*
* @example
* ```typescript
* // Continue for up to 5 iterations
* const strategy: AgentLoopStrategy = ({ iterationCount }) => iterationCount < 5;
* ```
*/
export type AgentLoopStrategy = (state: AgentLoopState) => boolean
/**
* Options passed into the SDK and further piped to the AI provider.
*/
export interface TextOptions<
TProviderOptionsSuperset extends Record<string, any> = Record<string, any>,
TProviderOptionsForModel = TProviderOptionsSuperset,
> {
model: string
messages: Array<ModelMessage>
tools?: Array<Tool<any, any, any>>
systemPrompts?: Array<string>
agentLoopStrategy?: AgentLoopStrategy
/**
* Controls the randomness of the output.
* Higher values (e.g., 0.8) make output more random, lower values (e.g., 0.2) make it more focused and deterministic.
* Range: [0.0, 2.0]
*
* Note: Generally recommended to use either temperature or topP, but not both.
*
* Provider usage:
* - OpenAI: `temperature` (number) - in text.top_p field
* - Anthropic: `temperature` (number) - ranges from 0.0 to 1.0, default 1.0
* - Gemini: `generationConfig.temperature` (number) - ranges from 0.0 to 2.0
*/
temperature?: number
/**
* Nucleus sampling parameter. An alternative to temperature sampling.
* The model considers the results of tokens with topP probability mass.
* For example, 0.1 means only tokens comprising the top 10% probability mass are considered.
*
* Note: Generally recommended to use either temperature or topP, but not both.
*
* Provider usage:
* - OpenAI: `text.top_p` (number)
* - Anthropic: `top_p` (number | null)
* - Gemini: `generationConfig.topP` (number)
*/
topP?: number
/**
* The maximum number of tokens to generate in the response.
*
* Provider usage:
* - OpenAI: `max_output_tokens` (number) - includes visible output and reasoning tokens
* - Anthropic: `max_tokens` (number, required) - range x >= 1
* - Gemini: `generationConfig.maxOutputTokens` (number)
*/
maxTokens?: number
/**
* Additional metadata to attach to the request.
* Can be used for tracking, debugging, or passing custom information.
* Structure and constraints vary by provider.
*
* Provider usage:
* - OpenAI: `metadata` (Record<string, string>) - max 16 key-value pairs, keys max 64 chars, values max 512 chars
* - Anthropic: `metadata` (Record<string, any>) - includes optional user_id (max 256 chars)
* - Gemini: Not directly available in TextProviderOptions
*/
metadata?: Record<string, any>
modelOptions?: TProviderOptionsForModel
request?: Request | RequestInit
/**
* Schema for structured output.
* When provided, the adapter should use the provider's native structured output API
* to ensure the response conforms to this schema.
* The schema will be converted to JSON Schema format before being sent to the provider.
* Supports any Standard JSON Schema compliant library (Zod, ArkType, Valibot, etc.).
*/
outputSchema?: SchemaInput
/**
* Conversation ID for correlating client and server-side devtools events.
* When provided, server-side events will be linked to the client conversation in devtools.
*/
conversationId?: string
/**
* AbortController for request cancellation.
*
* Allows you to cancel an in-progress request using an AbortController.
* Useful for implementing timeouts or user-initiated cancellations.
*
* @example
* const abortController = new AbortController();
* setTimeout(() => abortController.abort(), 5000); // Cancel after 5 seconds
* await chat({ ..., abortController });
*
* @see https://developer.mozilla.org/en-US/docs/Web/API/AbortController
*/
abortController?: AbortController
}
// ============================================================================
// AG-UI Protocol Event Types
// ============================================================================
/**
* AG-UI Protocol event types.
* Based on the AG-UI specification for agent-user interaction.
* @see https://docs.ag-ui.com/concepts/events
*/
export type AGUIEventType =
| 'RUN_STARTED'
| 'RUN_FINISHED'
| 'RUN_ERROR'
| 'TEXT_MESSAGE_START'
| 'TEXT_MESSAGE_CONTENT'
| 'TEXT_MESSAGE_END'
| 'TOOL_CALL_START'
| 'TOOL_CALL_ARGS'
| 'TOOL_CALL_END'
| 'STEP_STARTED'
| 'STEP_FINISHED'
| 'MESSAGES_SNAPSHOT'
| 'STATE_SNAPSHOT'
| 'STATE_DELTA'
| 'CUSTOM'
/**
* Stream chunk/event types (AG-UI protocol).
*/
export type StreamChunkType = AGUIEventType
/**
* Base structure for AG-UI events.
* Extends AG-UI spec with TanStack AI additions (model field).
*/
export interface BaseAGUIEvent {
type: AGUIEventType
timestamp: number
/** Model identifier for multi-model support */
model?: string
/** Original provider event for debugging/advanced use cases */
rawEvent?: unknown
}
// ============================================================================
// AG-UI Event Interfaces
// ============================================================================
/**
* Emitted when a run starts.
* This is the first event in any streaming response.
*/
export interface RunStartedEvent extends BaseAGUIEvent {
type: 'RUN_STARTED'
/** Unique identifier for this run */
runId: string
/** Optional thread/conversation ID */
threadId?: string
}
/**
* Emitted when a run completes successfully.
*/
export interface RunFinishedEvent extends BaseAGUIEvent {
type: 'RUN_FINISHED'
/** Run identifier */
runId: string
/** Why the generation stopped */
finishReason: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null
/** Token usage statistics */
usage?: {
promptTokens: number
completionTokens: number
totalTokens: number
}
}
/**
* Emitted when an error occurs during a run.
*/
export interface RunErrorEvent extends BaseAGUIEvent {
type: 'RUN_ERROR'
/** Run identifier (if available) */
runId?: string
/** Error details */
error: {
message: string
code?: string
}
}
/**
* Emitted when a text message starts.
*/
export interface TextMessageStartEvent extends BaseAGUIEvent {
type: 'TEXT_MESSAGE_START'
/** Unique identifier for this message */
messageId: string
/** Role of the message sender */
role: 'user' | 'assistant' | 'system' | 'tool'
}
/**
* Emitted when text content is generated (streaming tokens).
*/
export interface TextMessageContentEvent extends BaseAGUIEvent {
type: 'TEXT_MESSAGE_CONTENT'
/** Message identifier */
messageId: string
/** The incremental content token */
delta: string
/** Full accumulated content so far (optional, for debugging) */
content?: string
}
/**
* Emitted when a text message completes.
*/
export interface TextMessageEndEvent extends BaseAGUIEvent {
type: 'TEXT_MESSAGE_END'
/** Message identifier */
messageId: string
}
/**
* Emitted when a tool call starts.
*/
export interface ToolCallStartEvent extends BaseAGUIEvent {
type: 'TOOL_CALL_START'
/** Unique identifier for this tool call */
toolCallId: string
/** Name of the tool being called */
toolName: string
/** ID of the parent message that initiated this tool call */
parentMessageId?: string
/** Index for parallel tool calls */
index?: number
/** Provider-specific metadata to carry into the ToolCall */
providerMetadata?: Record<string, unknown>
}
/**
* Emitted when tool call arguments are streaming.
*/
export interface ToolCallArgsEvent extends BaseAGUIEvent {
type: 'TOOL_CALL_ARGS'
/** Tool call identifier */
toolCallId: string
/** Incremental JSON arguments delta */
delta: string
/** Full accumulated arguments so far */
args?: string
}
/**
* Emitted when a tool call completes.
*/
export interface ToolCallEndEvent extends BaseAGUIEvent {
type: 'TOOL_CALL_END'
/** Tool call identifier */
toolCallId: string
/** Name of the tool */
toolName: string
/** Final parsed input arguments */
input?: unknown
/** Tool execution result (if executed) */
result?: string
}
/**
* Emitted when a thinking/reasoning step starts.
*/
export interface StepStartedEvent extends BaseAGUIEvent {
type: 'STEP_STARTED'
/** Unique identifier for this step */
stepId: string
/** Type of step (e.g., 'thinking', 'planning') */
stepType?: string
}
/**
* Emitted when a thinking/reasoning step finishes.
*/
export interface StepFinishedEvent extends BaseAGUIEvent {
type: 'STEP_FINISHED'
/** Step identifier */
stepId: string
/** Incremental thinking content */
delta: string
/** Full accumulated thinking content (optional, for debugging) */
content?: string
}
/**
* Emitted to provide a snapshot of all messages in a conversation.
*
* Unlike StateSnapshot (which carries arbitrary application state),
* MessagesSnapshot specifically delivers the conversation transcript.
* This is a first-class AG-UI event type.
*/
export interface MessagesSnapshotEvent extends BaseAGUIEvent {
type: 'MESSAGES_SNAPSHOT'
/** Complete array of messages in the conversation */
messages: Array<UIMessage>
}
/**
* Emitted to provide a full state snapshot.
*/
export interface StateSnapshotEvent extends BaseAGUIEvent {
type: 'STATE_SNAPSHOT'
/** The complete state object */
state: Record<string, unknown>
}
/**
* Emitted to provide an incremental state update.
*/
export interface StateDeltaEvent extends BaseAGUIEvent {
type: 'STATE_DELTA'
/** The state changes to apply */
delta: Record<string, unknown>
}
/**
* Custom event for extensibility.
*/
export interface CustomEvent extends BaseAGUIEvent {
type: 'CUSTOM'
/** Custom event name */
name: string
/** Custom event value */
value?: unknown
}
/**
* Union of all AG-UI events.
*/
export type AGUIEvent =
| RunStartedEvent
| RunFinishedEvent
| RunErrorEvent
| TextMessageStartEvent
| TextMessageContentEvent
| TextMessageEndEvent
| ToolCallStartEvent
| ToolCallArgsEvent
| ToolCallEndEvent
| StepStartedEvent
| StepFinishedEvent
| MessagesSnapshotEvent
| StateSnapshotEvent
| StateDeltaEvent
| CustomEvent
/**
* Chunk returned by the SDK during streaming chat completions.
* Uses the AG-UI protocol event format.
*/
export type StreamChunk = AGUIEvent
// Simple streaming format for basic text completions
// Converted to StreamChunk format by convertTextCompletionStream()
export interface TextCompletionChunk {
id: string
model: string
content: string
role?: 'assistant'
finishReason?: 'stop' | 'length' | 'content_filter' | null
usage?: {
promptTokens: number
completionTokens: number
totalTokens: number
}
}
export interface SummarizationOptions {
model: string
text: string
maxLength?: number
style?: 'bullet-points' | 'paragraph' | 'concise'
focus?: Array<string>
}
export interface SummarizationResult {
id: string
model: string
summary: string
usage: {
promptTokens: number
completionTokens: number
totalTokens: number
}
}
// ============================================================================
// Image Generation Types
// ============================================================================
/**
* Options for image generation.
* These are the common options supported across providers.
*/
export interface ImageGenerationOptions<
TProviderOptions extends object = object,
TSize extends string = string,
> {
/** The model to use for image generation */
model: string
/** Text description of the desired image(s) */
prompt: string
/** Number of images to generate (default: 1) */
numberOfImages?: number
/** Image size in WIDTHxHEIGHT format (e.g., "1024x1024") */
size?: TSize
/** Model-specific options for image generation */
modelOptions?: TProviderOptions
}
/**
* A single generated image
*/
export interface GeneratedImage {
/** Base64-encoded image data */
b64Json?: string
/** URL to the generated image (may be temporary) */
url?: string
/** Revised prompt used by the model (if applicable) */
revisedPrompt?: string
}
/**
* Result of image generation
*/
export interface ImageGenerationResult {
/** Unique identifier for the generation */
id: string
/** Model used for generation */
model: string
/** Array of generated images */
images: Array<GeneratedImage>
/** Token usage information (if available) */
usage?: {
inputTokens?: number
outputTokens?: number
totalTokens?: number
}
}
// ============================================================================
// Video Generation Types (Experimental)
// ============================================================================
/**
* Options for video generation.
* These are the common options supported across providers.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoGenerationOptions<
TProviderOptions extends object = object,
TSize extends string = string,
> {
/** The model to use for video generation */
model: string
/** Text description of the desired video */
prompt: string
/** Video size — format depends on the provider (e.g., "16:9", "1280x720") */
size?: TSize
/** Video duration in seconds */
duration?: number
/** Model-specific options for video generation */
modelOptions?: TProviderOptions
}
/**
* Result of creating a video generation job.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoJobResult {
/** Unique job identifier for polling status */
jobId: string
/** Model used for generation */
model: string
}
/**
* Status of a video generation job.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoStatusResult {
/** Job identifier */
jobId: string
/** Current status of the job */
status: 'pending' | 'processing' | 'completed' | 'failed'
/** Progress percentage (0-100), if available */
progress?: number
/** Error message if status is 'failed' */
error?: string
}
/**
* Result containing the URL to a generated video.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoUrlResult {
/** Job identifier */
jobId: string
/** URL to the generated video */
url: string
/** When the URL expires, if applicable */
expiresAt?: Date
}
// ============================================================================
// Text-to-Speech (TTS) Types
// ============================================================================
/**
* Options for text-to-speech generation.
* These are the common options supported across providers.
*/
export interface TTSOptions<TProviderOptions extends object = object> {
/** The model to use for TTS generation */
model: string
/** The text to convert to speech */
text: string
/** The voice to use for generation */
voice?: string
/** The output audio format */
format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm'
/** The speed of the generated audio (0.25 to 4.0) */
speed?: number
/** Model-specific options for TTS generation */
modelOptions?: TProviderOptions
}
/**
* Result of text-to-speech generation.
*/
export interface TTSResult {
/** Unique identifier for the generation */
id: string
/** Model used for generation */
model: string
/** Base64-encoded audio data */
audio: string
/** Audio format of the generated audio */
format: string
/** Duration of the audio in seconds, if available */
duration?: number
/** Content type of the audio (e.g., 'audio/mp3') */
contentType?: string
}
// ============================================================================
// Transcription (Speech-to-Text) Types
// ============================================================================
/**
* Options for audio transcription.
* These are the common options supported across providers.
*/
export interface TranscriptionOptions<
TProviderOptions extends object = object,
> {
/** The model to use for transcription */
model: string
/** The audio data to transcribe - can be base64 string, File, Blob, or Buffer */
audio: string | File | Blob | ArrayBuffer
/** The language of the audio in ISO-639-1 format (e.g., 'en') */
language?: string
/** An optional prompt to guide the transcription */
prompt?: string
/** The format of the transcription output */
responseFormat?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt'
/** Model-specific options for transcription */
modelOptions?: TProviderOptions
}
/**
* A single segment of transcribed audio with timing information.
*/
export interface TranscriptionSegment {
/** Unique identifier for the segment */
id: number
/** Start time of the segment in seconds */
start: number
/** End time of the segment in seconds */
end: number
/** Transcribed text for this segment */
text: string
/** Confidence score (0-1), if available */
confidence?: number
/** Speaker identifier, if diarization is enabled */
speaker?: string
}
/**
* A single word with timing information.
*/
export interface TranscriptionWord {
/** The transcribed word */
word: string
/** Start time in seconds */
start: number
/** End time in seconds */
end: number
}
/**
* Result of audio transcription.
*/
export interface TranscriptionResult {
/** Unique identifier for the transcription */
id: string
/** Model used for transcription */
model: string
/** The full transcribed text */
text: string
/** Language detected or specified */
language?: string
/** Duration of the audio in seconds */
duration?: number
/** Detailed segments with timing, if available */
segments?: Array<TranscriptionSegment>
/** Word-level timestamps, if available */
words?: Array<TranscriptionWord>
}
/**
* Default metadata type for adapters that don't define custom metadata.
* Uses unknown for all modalities.
*/
export interface DefaultMessageMetadataByModality {
text: unknown
image: unknown
audio: unknown
video: unknown
document: unknown
}