UNPKG

@openai/agents-realtime

Version:

The OpenAI Agents SDK is a lightweight yet powerful framework for building multi-agent workflows. This package contains the logic for building realtime voice agents on the server or in the browser.

223 lines (222 loc) 8.59 kB
import { RunContext, Usage, RunToolApprovalItem } from '@openai/agents-core'; import { RuntimeEventEmitter } from '@openai/agents-core/_shims'; import type { RealtimeSessionConfig, RealtimeUserInput, RealtimeMcpToolInfo } from './clientMessages'; import { RealtimeOutputGuardrail, RealtimeOutputGuardrailSettings } from './guardrail'; import { RealtimeItem } from './items'; import { OpenAIRealtimeModels } from './openaiRealtimeBase'; import { RealtimeAgent } from './realtimeAgent'; import { RealtimeSessionEventTypes } from './realtimeSessionEvents'; import type { ApiKey, RealtimeTransportLayer } from './transportLayer'; /** * The context data for a realtime session. This is the context data that is passed to the agent. * The RealtimeSession will automatically add the current snapshot of the history to the context. */ export type RealtimeContextData<TContext = unknown> = TContext & { history: RealtimeItem[]; }; export type RealtimeSessionOptions<TContext = unknown> = { /** * The API key to use for the connection. Pass a function to lazily load the API key */ apiKey: ApiKey; /** * The transport layer to use. */ transport: 'webrtc' | 'websocket' | RealtimeTransportLayer; /** * The model to use. */ model?: OpenAIRealtimeModels | (string & {}); /** * Additional context to pass to the agent */ context?: TContext; /** * Any output guardrails to apply to agent output in parallel */ outputGuardrails?: RealtimeOutputGuardrail[]; /** * Configure the behavior of your guardrails */ outputGuardrailSettings?: RealtimeOutputGuardrailSettings; /** * Additional session config options. Overrides default client options. */ config?: Partial<RealtimeSessionConfig>; /** * Whether the history copy should include a local copy of the audio data. By default it is not * included in the history to save runtime memory on the client. If you wish to keep this data * you can enable this option. */ historyStoreAudio?: boolean; /** * Whether tracing is disabled for this session. If disabled, we will not trace the agent run. */ tracingDisabled?: boolean; /** * A group identifier to use for tracing, to link multiple traces together. For example, if you * want to connect your RealtimeSession traces with those of a backend text-based agent run. */ groupId?: string; /** * An optional dictionary of additional metadata to include with the trace. */ traceMetadata?: Record<string, any>; /** * The workflow name to use for tracing. */ workflowName?: string; /** * Whether to automatically trigger a response for MCP tool calls. */ automaticallyTriggerResponseForMcpToolCalls?: boolean; }; export type RealtimeSessionConnectOptions = { /** * The API key to use for the connection. Pass a function to lazily load the API key. Overrides * default client options. */ apiKey: string | (() => string | Promise<string>); /** * The model to use for the connection. */ model?: OpenAIRealtimeModels | (string & {}); /** * The URL to use for the connection. */ url?: string; }; /** * A `RealtimeSession` is the cornerstone of building Voice Agents. It's the equivalent of a * Runner in text-based agents except that it automatically handles multiple turns by maintaining a * connection with the underlying transport layer. * * The session handles managing the local history copy, executes tools, runs output guardrails, and * facilitates handoffs. * * The actual audio handling and generation of model responses is handled by the underlying * transport layer. By default if you are using a browser with WebRTC support, the session will * automatically use the WebRTC version of the OpenAI Realtime API. On the server or if you pass * `websocket` as the transport layer, the session will establish a connection using WebSockets. * * In the case of WebRTC, in the browser, the transport layer will also automatically configure the * microphone and audio output to be used by the session. * * You can also create a transport layer instance yourself and pass it in to have more control over * the configuration or even extend the existing ones. Check out the `TwilioRealtimeTransportLayer` * for an example of how to create a custom transport layer. * * @example * ```ts * const agent = new RealtimeAgent({ * name: 'my-agent', * instructions: 'You are a helpful assistant that can answer questions and help with tasks.', * }) * * const session = new RealtimeSession(agent); * session.connect({ * apiKey: 'your-api-key', * }); * ``` */ export declare class RealtimeSession<TBaseContext = unknown> extends RuntimeEventEmitter<RealtimeSessionEventTypes<TBaseContext>> { #private; readonly initialAgent: RealtimeAgent<TBaseContext> | RealtimeAgent<RealtimeContextData<TBaseContext>>; readonly options: Partial<RealtimeSessionOptions<TBaseContext>>; constructor(initialAgent: RealtimeAgent<TBaseContext> | RealtimeAgent<RealtimeContextData<TBaseContext>>, options?: Partial<RealtimeSessionOptions<TBaseContext>>); /** * The transport layer used by the session. */ get transport(): RealtimeTransportLayer; /** * The current agent in the session. */ get currentAgent(): RealtimeAgent<TBaseContext> | RealtimeAgent<RealtimeContextData<TBaseContext>>; /** * The current usage of the session. */ get usage(): Usage; /** * The current context of the session. */ get context(): RunContext<RealtimeContextData<TBaseContext>>; /** * Whether the session is muted. Might be `null` if the underlying transport layer does not * support muting. */ get muted(): boolean | null; /** * The history of the session. */ get history(): RealtimeItem[]; get availableMcpTools(): RealtimeMcpToolInfo[]; updateAgent(newAgent: RealtimeAgent<TBaseContext>): Promise<RealtimeAgent<TBaseContext>>; /** * Connect to the session. This will establish the connection to the underlying transport layer * and start the session. * * After connecting, the session will also emit a `history_updated` event with an empty history. * * @param options - The options for the connection. */ connect(options: RealtimeSessionConnectOptions): Promise<void>; /** * Update the history of the session. * @param newHistory - The new history to set. */ updateHistory(newHistory: RealtimeItem[] | ((history: RealtimeItem[]) => RealtimeItem[])): void; /** * Send a message to the session. * @param message - The message to send. * @param otherEventData - Additional event data to send. */ sendMessage(message: RealtimeUserInput, otherEventData?: Record<string, any>): void; /** * Add image to the session * @param image - The image to add. */ addImage(image: string, { triggerResponse }?: { triggerResponse?: boolean; }): void; /** * Mute the session. * @param muted - Whether to mute the session. */ mute(muted: boolean): void; /** * Disconnect from the session. */ close(): void; /** * Send audio to the session. * @param audio - The audio to send. * @param options - Additional options. * @param options.commit - Whether to finish the turn with this audio. */ sendAudio(audio: ArrayBuffer, options?: { commit?: boolean; }): void; /** * Interrupt the session artificially for example if you want to build a "stop talking" * button. */ interrupt(): void; /** * Approve a tool call. This will also trigger the tool call to the agent. * @param approvalItem - The approval item to approve. * @param options - Additional options. * @param options.alwaysApprove - Whether to always approve the tool call. */ approve(approvalItem: RunToolApprovalItem, options?: { alwaysApprove?: boolean; }): Promise<void>; /** * Reject a tool call. This will also trigger the tool call to the agent. * @param approvalItem - The approval item to reject. * @param options - Additional options. * @param options.alwaysReject - Whether to always reject the tool call. */ reject(approvalItem: RunToolApprovalItem, options?: { alwaysReject?: boolean; }): Promise<void>; }