@openai/agents-realtime
Version:
The OpenAI Agents SDK is a lightweight yet powerful framework for building multi-agent workflows. This package contains the logic for building realtime voice agents on the server or in the browser.
152 lines (151 loc) • 5.57 kB
TypeScript
import { JsonObjectSchema, ModelSettingsToolChoice, Prompt } from '@openai/agents-core/types';
export type RealtimeClientMessage = {
type: string;
[key: string]: any;
};
export type RealtimeUserInput = string | {
type: 'message';
role: 'user';
content: ({
type: 'input_text';
text: string;
} | {
type: 'input_image';
image: string;
providerData?: Record<string, any>;
})[];
};
export type RealtimeAudioFormatDefinition = {
type: 'audio/pcm';
rate: number;
} | {
type: 'audio/pcmu';
} | {
type: 'audio/pcma';
};
/**
* @deprecated Use a {type: "audio/pcm"} format instead. String shorthands are deprecated.
*/
export type RealtimeAudioFormatLegacy = 'pcm16' | 'g711_ulaw' | 'g711_alaw' | (string & {});
export type RealtimeAudioFormat = RealtimeAudioFormatLegacy | RealtimeAudioFormatDefinition;
export type RealtimeTracingConfig = {
workflow_name?: string;
group_id?: string;
metadata?: Record<string, any>;
} | 'auto';
export type RealtimeInputAudioNoiseReductionConfig = {
type: 'near_field' | 'far_field' | (string & {});
};
export type RealtimeInputAudioTranscriptionConfig = {
language?: string;
model?: 'gpt-4o-transcribe' | 'gpt-4o-mini-transcribe' | 'whisper-1' | (string & {});
prompt?: string;
};
export type RealtimeTurnDetectionConfigAsIs = {
type?: 'semantic_vad' | 'server_vad' | (string & {});
create_response?: boolean;
eagerness?: 'auto' | 'low' | 'medium' | 'high';
interrupt_response?: boolean;
prefix_padding_ms?: number;
silence_duration_ms?: number;
threshold?: number;
idle_timeout_ms?: number;
};
export type RealtimeTurnDetectionConfigCamelCase = {
type?: 'semantic_vad' | 'server_vad' | (string & {});
createResponse?: boolean;
eagerness?: 'auto' | 'low' | 'medium' | 'high';
interruptResponse?: boolean;
prefixPaddingMs?: number;
silenceDurationMs?: number;
threshold?: number;
idleTimeoutMs?: number;
};
export type RealtimeTurnDetectionConfig = (RealtimeTurnDetectionConfigAsIs | RealtimeTurnDetectionConfigCamelCase) & Record<string, any>;
export type RealtimeAudioInputConfig = {
format?: RealtimeAudioFormat;
noiseReduction?: RealtimeInputAudioNoiseReductionConfig | null;
transcription?: RealtimeInputAudioTranscriptionConfig;
turnDetection?: RealtimeTurnDetectionConfig;
};
export type RealtimeAudioOutputConfig = {
format?: RealtimeAudioFormat;
voice?: string;
speed?: number;
};
export type RealtimeAudioConfig = {
input?: RealtimeAudioInputConfig;
output?: RealtimeAudioOutputConfig;
};
export type RealtimeSessionConfigCommon = {
model: string;
instructions: string;
toolChoice: ModelSettingsToolChoice;
tools: RealtimeToolDefinition[];
tracing?: RealtimeTracingConfig | null;
providerData?: Record<string, any>;
prompt?: Prompt;
};
export type RealtimeSessionConfigDefinition = RealtimeSessionConfigCommon & {
outputModalities?: ('text' | 'audio')[];
audio?: RealtimeAudioConfig;
/**
* TODO: We'll eventually migrate to audio.output.voice instead of this property.
* Until we fully migrate to audio.output.voice for all session implementations,
* using this top-level voice property helps with backwards compatibility.
*/
voice?: string;
};
export type RealtimeSessionConfigDeprecated = RealtimeSessionConfigCommon & {
/** @deprecated Use outputModalities instead. */
modalities: ('text' | 'audio')[];
/** @deprecated Use audio.output.voice instead. */
voice: string;
/** @deprecated Use audio.input.format instead. */
inputAudioFormat: RealtimeAudioFormatLegacy;
/** @deprecated Use audio.output.format instead. */
outputAudioFormat: RealtimeAudioFormatLegacy;
/** @deprecated Use audio.input.transcription instead. */
inputAudioTranscription: RealtimeInputAudioTranscriptionConfig;
/** @deprecated Use audio.input.turnDetection instead. */
turnDetection: RealtimeTurnDetectionConfig;
/** @deprecated Use audio.input.noiseReduction instead. */
inputAudioNoiseReduction: RealtimeInputAudioNoiseReductionConfig | null;
/** @deprecated Use audio.output.speed instead. */
speed: number;
};
export type RealtimeSessionConfig = RealtimeSessionConfigDefinition | RealtimeSessionConfigDeprecated;
/**
* Convert any given config (old or new) to the new GA config shape.
* If a new config is provided, it will be returned as-is (normalized shallowly).
*/
export declare function toNewSessionConfig(config: Partial<RealtimeSessionConfig>): Partial<RealtimeSessionConfigDefinition>;
export declare function normalizeAudioFormat(format?: RealtimeAudioFormat | undefined): RealtimeAudioFormatDefinition | undefined;
export type FunctionToolDefinition = {
type: 'function';
name: string;
description: string;
parameters: JsonObjectSchema<any>;
strict: boolean;
};
export type HostedToolFilter = {
tool_names?: string[];
};
export type HostedMCPToolDefinition = {
type: 'mcp';
server_label: string;
server_url?: string;
headers?: Record<string, string>;
allowed_tools?: string[] | HostedToolFilter;
require_approval?: 'never' | 'always' | {
never?: HostedToolFilter;
always?: HostedToolFilter;
};
};
export type RealtimeToolDefinition = FunctionToolDefinition | HostedMCPToolDefinition;
export type RealtimeMcpToolInfo = {
name: string;
description?: string;
input_schema?: Record<string, any>;
[key: string]: any;
};