@tanstack/ai
Version:
Core TanStack AI library - Open source AI SDK
1,051 lines (1,050 loc) • 36.7 kB
TypeScript
import { StandardJSONSchemaV1 } from '@standard-schema/spec';
/**
* Tool call states - track the lifecycle of a tool call
*/
export type ToolCallState = 'awaiting-input' | 'input-streaming' | 'input-complete' | 'approval-requested' | 'approval-responded';
/**
* Tool result states - track the lifecycle of a tool result
*/
export type ToolResultState = 'streaming' | 'complete' | 'error';
/**
* JSON Schema type for defining tool input/output schemas as raw JSON Schema objects.
* This allows tools to be defined without schema libraries when you have JSON Schema definitions available.
*/
export interface JSONSchema {
type?: string | Array<string>;
properties?: Record<string, JSONSchema>;
items?: JSONSchema | Array<JSONSchema>;
required?: Array<string>;
enum?: Array<unknown>;
const?: unknown;
description?: string;
default?: unknown;
$ref?: string;
$defs?: Record<string, JSONSchema>;
definitions?: Record<string, JSONSchema>;
allOf?: Array<JSONSchema>;
anyOf?: Array<JSONSchema>;
oneOf?: Array<JSONSchema>;
not?: JSONSchema;
if?: JSONSchema;
then?: JSONSchema;
else?: JSONSchema;
minimum?: number;
maximum?: number;
exclusiveMinimum?: number;
exclusiveMaximum?: number;
minLength?: number;
maxLength?: number;
pattern?: string;
format?: string;
minItems?: number;
maxItems?: number;
uniqueItems?: boolean;
additionalProperties?: boolean | JSONSchema;
additionalItems?: boolean | JSONSchema;
patternProperties?: Record<string, JSONSchema>;
propertyNames?: JSONSchema;
minProperties?: number;
maxProperties?: number;
title?: string;
examples?: Array<unknown>;
[]: any;
}
/**
* Union type for schema input - can be any Standard JSON Schema compliant schema or a plain JSONSchema object.
*
* Standard JSON Schema compliant libraries include:
* - Zod v4.2+ (natively supports StandardJSONSchemaV1)
* - ArkType v2.1.28+ (natively supports StandardJSONSchemaV1)
* - Valibot v1.2+ (via `toStandardJsonSchema()` from `@valibot/to-json-schema`)
*
* @see https://standardschema.dev/json-schema
*/
export type SchemaInput = StandardJSONSchemaV1<any, any> | JSONSchema;
/**
* Infer the TypeScript type from a schema.
* For Standard JSON Schema compliant schemas, extracts the input type.
* For plain JSONSchema, returns `any` since we can't infer types from JSON Schema at compile time.
*/
export type InferSchemaType<T> = T extends StandardJSONSchemaV1<infer TInput, unknown> ? TInput : unknown;
export interface ToolCall {
id: string;
type: 'function';
function: {
name: string;
arguments: string;
};
/** Provider-specific metadata to carry through the tool call lifecycle */
providerMetadata?: Record<string, unknown>;
}
/**
* Supported input modality types for multimodal content.
* - 'text': Plain text content
* - 'image': Image content (base64 or URL)
* - 'audio': Audio content (base64 or URL)
* - 'video': Video content (base64 or URL)
* - 'document': Document content like PDFs (base64 or URL)
*/
export type Modality = 'text' | 'image' | 'audio' | 'video' | 'document';
/**
* Source specification for inline data content (base64).
* Requires a mimeType to ensure providers receive proper content type information.
*/
export interface ContentPartDataSource {
/**
* Indicates this is inline data content.
*/
type: 'data';
/**
* The base64-encoded content value.
*/
value: string;
/**
* The MIME type of the content (e.g., 'image/png', 'audio/wav').
* Required for data sources to ensure proper handling by providers.
*/
mimeType: string;
}
/**
* Source specification for URL-based content.
* mimeType is optional as it can often be inferred from the URL or response headers.
*/
export interface ContentPartUrlSource {
/**
* Indicates this is URL-referenced content.
*/
type: 'url';
/**
* HTTP(S) URL or data URI pointing to the content.
*/
value: string;
/**
* Optional MIME type hint for cases where providers can't infer it from the URL.
*/
mimeType?: string;
}
/**
* Source specification for multimodal content.
* Discriminated union supporting both inline data (base64) and URL-based content.
* - For 'data' sources: mimeType is required
* - For 'url' sources: mimeType is optional
*/
export type ContentPartSource = ContentPartDataSource | ContentPartUrlSource;
/**
* Image content part for multimodal messages.
* @template TMetadata - Provider-specific metadata type (e.g., OpenAI's detail level)
*/
export interface ImagePart<TMetadata = unknown> {
type: 'image';
/** Source of the image content */
source: ContentPartSource;
/** Provider-specific metadata (e.g., OpenAI's detail: 'auto' | 'low' | 'high') */
metadata?: TMetadata;
}
/**
* Audio content part for multimodal messages.
* @template TMetadata - Provider-specific metadata type
*/
export interface AudioPart<TMetadata = unknown> {
type: 'audio';
/** Source of the audio content */
source: ContentPartSource;
/** Provider-specific metadata (e.g., format, sample rate) */
metadata?: TMetadata;
}
/**
* Video content part for multimodal messages.
* @template TMetadata - Provider-specific metadata type
*/
export interface VideoPart<TMetadata = unknown> {
type: 'video';
/** Source of the video content */
source: ContentPartSource;
/** Provider-specific metadata (e.g., duration, resolution) */
metadata?: TMetadata;
}
/**
* Document content part for multimodal messages (e.g., PDFs).
* @template TMetadata - Provider-specific metadata type (e.g., Anthropic's media_type)
*/
export interface DocumentPart<TMetadata = unknown> {
type: 'document';
/** Source of the document content */
source: ContentPartSource;
/** Provider-specific metadata (e.g., media_type for PDFs) */
metadata?: TMetadata;
}
/**
* Union type for all multimodal content parts.
* @template TImageMeta - Provider-specific image metadata type
* @template TAudioMeta - Provider-specific audio metadata type
* @template TVideoMeta - Provider-specific video metadata type
* @template TDocumentMeta - Provider-specific document metadata type
*/
export type ContentPart<TTextMeta = unknown, TImageMeta = unknown, TAudioMeta = unknown, TVideoMeta = unknown, TDocumentMeta = unknown> = TextPart<TTextMeta> | ImagePart<TImageMeta> | AudioPart<TAudioMeta> | VideoPart<TVideoMeta> | DocumentPart<TDocumentMeta>;
/**
* Helper type to filter ContentPart union to only include specific modalities.
* Used to constrain message content based on model capabilities.
*/
export type ContentPartForInputModalitiesTypes<TInputModalitiesTypes extends InputModalitiesTypes> = Extract<ContentPart<TInputModalitiesTypes['messageMetadataByModality']['text'], TInputModalitiesTypes['messageMetadataByModality']['image'], TInputModalitiesTypes['messageMetadataByModality']['audio'], TInputModalitiesTypes['messageMetadataByModality']['video'], TInputModalitiesTypes['messageMetadataByModality']['document']>, {
type: TInputModalitiesTypes['inputModalities'][number];
}>;
/**
* Helper type to convert a readonly array of modalities to a union type.
* e.g., readonly ['text', 'image'] -> 'text' | 'image'
*/
export type ModalitiesArrayToUnion<T extends ReadonlyArray<Modality>> = T[number];
/**
* Type for message content constrained by supported modalities.
* When modalities is ['text', 'image'], only TextPart and ImagePart are allowed in the array.
*/
export type ConstrainedContent<TInputModalitiesTypes extends InputModalitiesTypes> = string | null | Array<ContentPartForInputModalitiesTypes<TInputModalitiesTypes>>;
export interface ModelMessage<TContent extends string | null | Array<ContentPart> = string | null | Array<ContentPart>> {
role: 'user' | 'assistant' | 'tool';
content: TContent;
name?: string;
toolCalls?: Array<ToolCall>;
toolCallId?: string;
}
/**
* Message parts - building blocks of UIMessage
*/
export interface TextPart<TMetadata = unknown> {
type: 'text';
content: string;
metadata?: TMetadata;
}
export interface ToolCallPart {
type: 'tool-call';
id: string;
name: string;
arguments: string;
state: ToolCallState;
/** Approval metadata if tool requires user approval */
approval?: {
id: string;
needsApproval: boolean;
approved?: boolean;
};
/** Tool execution output (for client tools or after approval) */
output?: any;
}
export interface ToolResultPart {
type: 'tool-result';
toolCallId: string;
content: string;
state: ToolResultState;
error?: string;
}
export interface ThinkingPart {
type: 'thinking';
content: string;
}
export type MessagePart = TextPart | ImagePart | AudioPart | VideoPart | DocumentPart | ToolCallPart | ToolResultPart | ThinkingPart;
/**
* UIMessage - Domain-specific message format optimized for building chat UIs
* Contains parts that can be text, tool calls, or tool results
*/
export interface UIMessage {
id: string;
role: 'system' | 'user' | 'assistant';
parts: Array<MessagePart>;
createdAt?: Date;
}
export type InputModalitiesTypes = {
inputModalities: ReadonlyArray<Modality>;
messageMetadataByModality: DefaultMessageMetadataByModality;
};
/**
* A ModelMessage with content constrained to only allow content parts
* matching the specified input modalities.
*/
export type ConstrainedModelMessage<TInputModalitiesTypes extends InputModalitiesTypes> = Omit<ModelMessage, 'content'> & {
content: ConstrainedContent<TInputModalitiesTypes>;
};
/**
* Context passed to tool execute functions, providing capabilities like
* emitting custom events during execution.
*/
export interface ToolExecutionContext {
/** The ID of the tool call being executed */
toolCallId?: string;
/**
* Emit a custom event during tool execution.
* Events are streamed to the client in real-time as AG-UI CUSTOM events.
*
* @param eventName - Name of the custom event
* @param value - Event payload value
*
* @example
* ```ts
* const tool = toolDefinition({ ... }).server(async (args, context) => {
* context?.emitCustomEvent('progress', { step: 1, total: 3 })
* // ... do work ...
* context?.emitCustomEvent('progress', { step: 2, total: 3 })
* // ... do more work ...
* return result
* })
* ```
*/
emitCustomEvent: (eventName: string, value: Record<string, any>) => void;
}
/**
* Tool/Function definition for function calling.
*
* Tools allow the model to interact with external systems, APIs, or perform computations.
* The model will decide when to call tools based on the user's request and the tool descriptions.
*
* Tools can use any Standard JSON Schema compliant library (Zod, ArkType, Valibot, etc.)
* or plain JSON Schema objects for runtime validation and type safety.
*
* @see https://platform.openai.com/docs/guides/function-calling
* @see https://docs.anthropic.com/claude/docs/tool-use
* @see https://standardschema.dev/json-schema
*/
export interface Tool<TInput extends SchemaInput = SchemaInput, TOutput extends SchemaInput = SchemaInput, TName extends string = string> {
/**
* Unique name of the tool (used by the model to call it).
*
* Should be descriptive and follow naming conventions (e.g., snake_case or camelCase).
* Must be unique within the tools array.
*
* @example "get_weather", "search_database", "sendEmail"
*/
name: TName;
/**
* Clear description of what the tool does.
*
* This is crucial - the model uses this to decide when to call the tool.
* Be specific about what the tool does, what parameters it needs, and what it returns.
*
* @example "Get the current weather in a given location. Returns temperature, conditions, and forecast."
*/
description: string;
/**
* Schema describing the tool's input parameters.
*
* Can be any Standard JSON Schema compliant schema (Zod, ArkType, Valibot, etc.) or a plain JSON Schema object.
* Defines the structure and types of arguments the tool accepts.
* The model will generate arguments matching this schema.
* Standard JSON Schema compliant schemas are converted to JSON Schema for LLM providers.
*
* @see https://standardschema.dev/json-schema
* @see https://json-schema.org/
*
* @example
* // Using Zod v4+ schema (natively supports Standard JSON Schema)
* import { z } from 'zod';
* z.object({
* location: z.string().describe("City name or coordinates"),
* unit: z.enum(["celsius", "fahrenheit"]).optional()
* })
*
* @example
* // Using ArkType (natively supports Standard JSON Schema)
* import { type } from 'arktype';
* type({
* location: 'string',
* unit: "'celsius' | 'fahrenheit'"
* })
*
* @example
* // Using plain JSON Schema
* {
* type: 'object',
* properties: {
* location: { type: 'string', description: 'City name or coordinates' },
* unit: { type: 'string', enum: ['celsius', 'fahrenheit'] }
* },
* required: ['location']
* }
*/
inputSchema?: TInput;
/**
* Optional schema for validating tool output.
*
* Can be any Standard JSON Schema compliant schema or a plain JSON Schema object.
* If provided with a Standard Schema compliant schema, tool results will be validated
* against this schema before being sent back to the model. This catches bugs in tool
* implementations and ensures consistent output formatting.
*
* Note: This is client-side validation only - not sent to LLM providers.
* Note: Plain JSON Schema output validation is not performed at runtime.
*
* @example
* // Using Zod
* z.object({
* temperature: z.number(),
* conditions: z.string(),
* forecast: z.array(z.string()).optional()
* })
*/
outputSchema?: TOutput;
/**
* Optional function to execute when the model calls this tool.
*
* If provided, the SDK will automatically execute the function with the model's arguments
* and feed the result back to the model. This enables autonomous tool use loops.
*
* Can return any value - will be automatically stringified if needed.
*
* @param args - The arguments parsed from the model's tool call (validated against inputSchema)
* @returns Result to send back to the model (validated against outputSchema if provided)
*
* @example
* execute: async (args) => {
* const weather = await fetchWeather(args.location);
* return weather; // Can return object or string
* }
*/
execute?: (args: any, context?: ToolExecutionContext) => Promise<any> | any;
/** If true, tool execution requires user approval before running. Works with both server and client tools. */
needsApproval?: boolean;
/** If true, this tool is lazy and will only be sent to the LLM after being discovered via the lazy tool discovery mechanism. Only meaningful when used with chat(). */
lazy?: boolean;
/** Additional metadata for adapters or custom extensions */
metadata?: Record<string, any>;
}
export interface ToolConfig {
[]: Tool;
}
/**
* Structured output format specification.
*
* Constrains the model's output to match a specific JSON structure.
* Useful for extracting structured data, form filling, or ensuring consistent response formats.
*
* @see https://platform.openai.com/docs/guides/structured-outputs
* @see https://sdk.vercel.ai/docs/ai-sdk-core/structured-outputs
*
* @template TData - TypeScript type of the expected data structure (for type safety)
*/
export interface ResponseFormat<TData = any> {
/**
* Type of structured output.
*
* - "json_object": Forces the model to output valid JSON (any structure)
* - "json_schema": Validates output against a provided JSON Schema (strict structure)
*
* @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format
*/
type: 'json_object' | 'json_schema';
/**
* JSON schema specification (required when type is "json_schema").
*
* Defines the exact structure the model's output must conform to.
* OpenAI's structured outputs will guarantee the output matches this schema.
*/
json_schema?: {
/**
* Unique name for the schema.
*
* Used to identify the schema in logs and debugging.
* Should be descriptive (e.g., "user_profile", "search_results").
*/
name: string;
/**
* Optional description of what the schema represents.
*
* Helps document the purpose of this structured output.
*
* @example "User profile information including name, email, and preferences"
*/
description?: string;
/**
* JSON Schema definition for the expected output structure.
*
* Must be a valid JSON Schema (draft 2020-12 or compatible).
* The model's output will be validated against this schema.
*
* @see https://json-schema.org/
*
* @example
* {
* type: "object",
* properties: {
* name: { type: "string" },
* age: { type: "number" },
* email: { type: "string", format: "email" }
* },
* required: ["name", "email"],
* additionalProperties: false
* }
*/
schema: Record<string, any>;
/**
* Whether to enforce strict schema validation.
*
* When true (recommended), the model guarantees output will match the schema exactly.
* When false, the model will "best effort" match the schema.
*
* Default: true (for providers that support it)
*
* @see https://platform.openai.com/docs/guides/structured-outputs#strict-mode
*/
strict?: boolean;
};
/**
* Type-only property to carry the inferred data type.
*
* This is never set at runtime - it only exists for TypeScript type inference.
* Allows the SDK to know what type to expect when parsing the response.
*
* @internal
*/
__data?: TData;
}
/**
* State passed to agent loop strategy for determining whether to continue
*/
export interface AgentLoopState {
/** Current iteration count (0-indexed) */
iterationCount: number;
/** Current messages array */
messages: Array<ModelMessage>;
/** Finish reason from the last response */
finishReason: string | null;
}
/**
* Strategy function that determines whether the agent loop should continue
*
* @param state - Current state of the agent loop
* @returns true to continue looping, false to stop
*
* @example
* ```typescript
* // Continue for up to 5 iterations
* const strategy: AgentLoopStrategy = ({ iterationCount }) => iterationCount < 5;
* ```
*/
export type AgentLoopStrategy = (state: AgentLoopState) => boolean;
/**
* Options passed into the SDK and further piped to the AI provider.
*/
export interface TextOptions<TProviderOptionsSuperset extends Record<string, any> = Record<string, any>, TProviderOptionsForModel = TProviderOptionsSuperset> {
model: string;
messages: Array<ModelMessage>;
tools?: Array<Tool<any, any, any>>;
systemPrompts?: Array<string>;
agentLoopStrategy?: AgentLoopStrategy;
/**
* Controls the randomness of the output.
* Higher values (e.g., 0.8) make output more random, lower values (e.g., 0.2) make it more focused and deterministic.
* Range: [0.0, 2.0]
*
* Note: Generally recommended to use either temperature or topP, but not both.
*
* Provider usage:
* - OpenAI: `temperature` (number) - in text.top_p field
* - Anthropic: `temperature` (number) - ranges from 0.0 to 1.0, default 1.0
* - Gemini: `generationConfig.temperature` (number) - ranges from 0.0 to 2.0
*/
temperature?: number;
/**
* Nucleus sampling parameter. An alternative to temperature sampling.
* The model considers the results of tokens with topP probability mass.
* For example, 0.1 means only tokens comprising the top 10% probability mass are considered.
*
* Note: Generally recommended to use either temperature or topP, but not both.
*
* Provider usage:
* - OpenAI: `text.top_p` (number)
* - Anthropic: `top_p` (number | null)
* - Gemini: `generationConfig.topP` (number)
*/
topP?: number;
/**
* The maximum number of tokens to generate in the response.
*
* Provider usage:
* - OpenAI: `max_output_tokens` (number) - includes visible output and reasoning tokens
* - Anthropic: `max_tokens` (number, required) - range x >= 1
* - Gemini: `generationConfig.maxOutputTokens` (number)
*/
maxTokens?: number;
/**
* Additional metadata to attach to the request.
* Can be used for tracking, debugging, or passing custom information.
* Structure and constraints vary by provider.
*
* Provider usage:
* - OpenAI: `metadata` (Record<string, string>) - max 16 key-value pairs, keys max 64 chars, values max 512 chars
* - Anthropic: `metadata` (Record<string, any>) - includes optional user_id (max 256 chars)
* - Gemini: Not directly available in TextProviderOptions
*/
metadata?: Record<string, any>;
modelOptions?: TProviderOptionsForModel;
request?: Request | RequestInit;
/**
* Schema for structured output.
* When provided, the adapter should use the provider's native structured output API
* to ensure the response conforms to this schema.
* The schema will be converted to JSON Schema format before being sent to the provider.
* Supports any Standard JSON Schema compliant library (Zod, ArkType, Valibot, etc.).
*/
outputSchema?: SchemaInput;
/**
* Conversation ID for correlating client and server-side devtools events.
* When provided, server-side events will be linked to the client conversation in devtools.
*/
conversationId?: string;
/**
* AbortController for request cancellation.
*
* Allows you to cancel an in-progress request using an AbortController.
* Useful for implementing timeouts or user-initiated cancellations.
*
* @example
* const abortController = new AbortController();
* setTimeout(() => abortController.abort(), 5000); // Cancel after 5 seconds
* await chat({ ..., abortController });
*
* @see https://developer.mozilla.org/en-US/docs/Web/API/AbortController
*/
abortController?: AbortController;
}
/**
* AG-UI Protocol event types.
* Based on the AG-UI specification for agent-user interaction.
* @see https://docs.ag-ui.com/concepts/events
*/
export type AGUIEventType = 'RUN_STARTED' | 'RUN_FINISHED' | 'RUN_ERROR' | 'TEXT_MESSAGE_START' | 'TEXT_MESSAGE_CONTENT' | 'TEXT_MESSAGE_END' | 'TOOL_CALL_START' | 'TOOL_CALL_ARGS' | 'TOOL_CALL_END' | 'STEP_STARTED' | 'STEP_FINISHED' | 'MESSAGES_SNAPSHOT' | 'STATE_SNAPSHOT' | 'STATE_DELTA' | 'CUSTOM';
/**
* Stream chunk/event types (AG-UI protocol).
*/
export type StreamChunkType = AGUIEventType;
/**
* Base structure for AG-UI events.
* Extends AG-UI spec with TanStack AI additions (model field).
*/
export interface BaseAGUIEvent {
type: AGUIEventType;
timestamp: number;
/** Model identifier for multi-model support */
model?: string;
/** Original provider event for debugging/advanced use cases */
rawEvent?: unknown;
}
/**
* Emitted when a run starts.
* This is the first event in any streaming response.
*/
export interface RunStartedEvent extends BaseAGUIEvent {
type: 'RUN_STARTED';
/** Unique identifier for this run */
runId: string;
/** Optional thread/conversation ID */
threadId?: string;
}
/**
* Emitted when a run completes successfully.
*/
export interface RunFinishedEvent extends BaseAGUIEvent {
type: 'RUN_FINISHED';
/** Run identifier */
runId: string;
/** Why the generation stopped */
finishReason: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
/** Token usage statistics */
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
}
/**
* Emitted when an error occurs during a run.
*/
export interface RunErrorEvent extends BaseAGUIEvent {
type: 'RUN_ERROR';
/** Run identifier (if available) */
runId?: string;
/** Error details */
error: {
message: string;
code?: string;
};
}
/**
* Emitted when a text message starts.
*/
export interface TextMessageStartEvent extends BaseAGUIEvent {
type: 'TEXT_MESSAGE_START';
/** Unique identifier for this message */
messageId: string;
/** Role of the message sender */
role: 'user' | 'assistant' | 'system' | 'tool';
}
/**
* Emitted when text content is generated (streaming tokens).
*/
export interface TextMessageContentEvent extends BaseAGUIEvent {
type: 'TEXT_MESSAGE_CONTENT';
/** Message identifier */
messageId: string;
/** The incremental content token */
delta: string;
/** Full accumulated content so far (optional, for debugging) */
content?: string;
}
/**
* Emitted when a text message completes.
*/
export interface TextMessageEndEvent extends BaseAGUIEvent {
type: 'TEXT_MESSAGE_END';
/** Message identifier */
messageId: string;
}
/**
* Emitted when a tool call starts.
*/
export interface ToolCallStartEvent extends BaseAGUIEvent {
type: 'TOOL_CALL_START';
/** Unique identifier for this tool call */
toolCallId: string;
/** Name of the tool being called */
toolName: string;
/** ID of the parent message that initiated this tool call */
parentMessageId?: string;
/** Index for parallel tool calls */
index?: number;
/** Provider-specific metadata to carry into the ToolCall */
providerMetadata?: Record<string, unknown>;
}
/**
* Emitted when tool call arguments are streaming.
*/
export interface ToolCallArgsEvent extends BaseAGUIEvent {
type: 'TOOL_CALL_ARGS';
/** Tool call identifier */
toolCallId: string;
/** Incremental JSON arguments delta */
delta: string;
/** Full accumulated arguments so far */
args?: string;
}
/**
* Emitted when a tool call completes.
*/
export interface ToolCallEndEvent extends BaseAGUIEvent {
type: 'TOOL_CALL_END';
/** Tool call identifier */
toolCallId: string;
/** Name of the tool */
toolName: string;
/** Final parsed input arguments */
input?: unknown;
/** Tool execution result (if executed) */
result?: string;
}
/**
* Emitted when a thinking/reasoning step starts.
*/
export interface StepStartedEvent extends BaseAGUIEvent {
type: 'STEP_STARTED';
/** Unique identifier for this step */
stepId: string;
/** Type of step (e.g., 'thinking', 'planning') */
stepType?: string;
}
/**
* Emitted when a thinking/reasoning step finishes.
*/
export interface StepFinishedEvent extends BaseAGUIEvent {
type: 'STEP_FINISHED';
/** Step identifier */
stepId: string;
/** Incremental thinking content */
delta: string;
/** Full accumulated thinking content (optional, for debugging) */
content?: string;
}
/**
* Emitted to provide a snapshot of all messages in a conversation.
*
* Unlike StateSnapshot (which carries arbitrary application state),
* MessagesSnapshot specifically delivers the conversation transcript.
* This is a first-class AG-UI event type.
*/
export interface MessagesSnapshotEvent extends BaseAGUIEvent {
type: 'MESSAGES_SNAPSHOT';
/** Complete array of messages in the conversation */
messages: Array<UIMessage>;
}
/**
* Emitted to provide a full state snapshot.
*/
export interface StateSnapshotEvent extends BaseAGUIEvent {
type: 'STATE_SNAPSHOT';
/** The complete state object */
state: Record<string, unknown>;
}
/**
* Emitted to provide an incremental state update.
*/
export interface StateDeltaEvent extends BaseAGUIEvent {
type: 'STATE_DELTA';
/** The state changes to apply */
delta: Record<string, unknown>;
}
/**
* Custom event for extensibility.
*/
export interface CustomEvent extends BaseAGUIEvent {
type: 'CUSTOM';
/** Custom event name */
name: string;
/** Custom event value */
value?: unknown;
}
/**
* Union of all AG-UI events.
*/
export type AGUIEvent = RunStartedEvent | RunFinishedEvent | RunErrorEvent | TextMessageStartEvent | TextMessageContentEvent | TextMessageEndEvent | ToolCallStartEvent | ToolCallArgsEvent | ToolCallEndEvent | StepStartedEvent | StepFinishedEvent | MessagesSnapshotEvent | StateSnapshotEvent | StateDeltaEvent | CustomEvent;
/**
* Chunk returned by the SDK during streaming chat completions.
* Uses the AG-UI protocol event format.
*/
export type StreamChunk = AGUIEvent;
export interface TextCompletionChunk {
id: string;
model: string;
content: string;
role?: 'assistant';
finishReason?: 'stop' | 'length' | 'content_filter' | null;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
}
export interface SummarizationOptions {
model: string;
text: string;
maxLength?: number;
style?: 'bullet-points' | 'paragraph' | 'concise';
focus?: Array<string>;
}
export interface SummarizationResult {
id: string;
model: string;
summary: string;
usage: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
}
/**
* Options for image generation.
* These are the common options supported across providers.
*/
export interface ImageGenerationOptions<TProviderOptions extends object = object, TSize extends string = string> {
/** The model to use for image generation */
model: string;
/** Text description of the desired image(s) */
prompt: string;
/** Number of images to generate (default: 1) */
numberOfImages?: number;
/** Image size in WIDTHxHEIGHT format (e.g., "1024x1024") */
size?: TSize;
/** Model-specific options for image generation */
modelOptions?: TProviderOptions;
}
/**
* A single generated image
*/
export interface GeneratedImage {
/** Base64-encoded image data */
b64Json?: string;
/** URL to the generated image (may be temporary) */
url?: string;
/** Revised prompt used by the model (if applicable) */
revisedPrompt?: string;
}
/**
* Result of image generation
*/
export interface ImageGenerationResult {
/** Unique identifier for the generation */
id: string;
/** Model used for generation */
model: string;
/** Array of generated images */
images: Array<GeneratedImage>;
/** Token usage information (if available) */
usage?: {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
};
}
/**
* Options for video generation.
* These are the common options supported across providers.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoGenerationOptions<TProviderOptions extends object = object, TSize extends string = string> {
/** The model to use for video generation */
model: string;
/** Text description of the desired video */
prompt: string;
/** Video size — format depends on the provider (e.g., "16:9", "1280x720") */
size?: TSize;
/** Video duration in seconds */
duration?: number;
/** Model-specific options for video generation */
modelOptions?: TProviderOptions;
}
/**
* Result of creating a video generation job.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoJobResult {
/** Unique job identifier for polling status */
jobId: string;
/** Model used for generation */
model: string;
}
/**
* Status of a video generation job.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoStatusResult {
/** Job identifier */
jobId: string;
/** Current status of the job */
status: 'pending' | 'processing' | 'completed' | 'failed';
/** Progress percentage (0-100), if available */
progress?: number;
/** Error message if status is 'failed' */
error?: string;
}
/**
* Result containing the URL to a generated video.
*
* @experimental Video generation is an experimental feature and may change.
*/
export interface VideoUrlResult {
/** Job identifier */
jobId: string;
/** URL to the generated video */
url: string;
/** When the URL expires, if applicable */
expiresAt?: Date;
}
/**
* Options for text-to-speech generation.
* These are the common options supported across providers.
*/
export interface TTSOptions<TProviderOptions extends object = object> {
/** The model to use for TTS generation */
model: string;
/** The text to convert to speech */
text: string;
/** The voice to use for generation */
voice?: string;
/** The output audio format */
format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
/** The speed of the generated audio (0.25 to 4.0) */
speed?: number;
/** Model-specific options for TTS generation */
modelOptions?: TProviderOptions;
}
/**
* Result of text-to-speech generation.
*/
export interface TTSResult {
/** Unique identifier for the generation */
id: string;
/** Model used for generation */
model: string;
/** Base64-encoded audio data */
audio: string;
/** Audio format of the generated audio */
format: string;
/** Duration of the audio in seconds, if available */
duration?: number;
/** Content type of the audio (e.g., 'audio/mp3') */
contentType?: string;
}
/**
* Options for audio transcription.
* These are the common options supported across providers.
*/
export interface TranscriptionOptions<TProviderOptions extends object = object> {
/** The model to use for transcription */
model: string;
/** The audio data to transcribe - can be base64 string, File, Blob, or Buffer */
audio: string | File | Blob | ArrayBuffer;
/** The language of the audio in ISO-639-1 format (e.g., 'en') */
language?: string;
/** An optional prompt to guide the transcription */
prompt?: string;
/** The format of the transcription output */
responseFormat?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt';
/** Model-specific options for transcription */
modelOptions?: TProviderOptions;
}
/**
* A single segment of transcribed audio with timing information.
*/
export interface TranscriptionSegment {
/** Unique identifier for the segment */
id: number;
/** Start time of the segment in seconds */
start: number;
/** End time of the segment in seconds */
end: number;
/** Transcribed text for this segment */
text: string;
/** Confidence score (0-1), if available */
confidence?: number;
/** Speaker identifier, if diarization is enabled */
speaker?: string;
}
/**
* A single word with timing information.
*/
export interface TranscriptionWord {
/** The transcribed word */
word: string;
/** Start time in seconds */
start: number;
/** End time in seconds */
end: number;
}
/**
* Result of audio transcription.
*/
export interface TranscriptionResult {
/** Unique identifier for the transcription */
id: string;
/** Model used for transcription */
model: string;
/** The full transcribed text */
text: string;
/** Language detected or specified */
language?: string;
/** Duration of the audio in seconds */
duration?: number;
/** Detailed segments with timing, if available */
segments?: Array<TranscriptionSegment>;
/** Word-level timestamps, if available */
words?: Array<TranscriptionWord>;
}
/**
* Default metadata type for adapters that don't define custom metadata.
* Uses unknown for all modalities.
*/
export interface DefaultMessageMetadataByModality {
text: unknown;
image: unknown;
audio: unknown;
video: unknown;
document: unknown;
}