UNPKG

@tanstack/ai

Version:

Core TanStack AI library - Open source AI SDK

235 lines (234 loc) 7.02 kB
/** * Voice activity detection configuration */ export interface VADConfig { /** Sensitivity threshold (0.0-1.0) */ threshold?: number; /** Audio to include before speech detection (ms) */ prefixPaddingMs?: number; /** Silence duration to end turn (ms) */ silenceDurationMs?: number; } /** * Serializable tool descriptor for realtime session configuration. * Contains only the metadata needed by providers, not Zod schemas or execute functions. */ export interface RealtimeToolConfig { name: string; description: string; inputSchema?: Record<string, any>; } /** * Configuration for a realtime session */ export interface RealtimeSessionConfig { /** Model to use for the session */ model?: string; /** Voice to use for audio output */ voice?: string; /** System instructions for the assistant */ instructions?: string; /** Tools available in the session */ tools?: Array<RealtimeToolConfig>; /** VAD mode */ vadMode?: 'server' | 'semantic' | 'manual'; /** VAD configuration */ vadConfig?: VADConfig; /** Output modalities for responses (e.g., ['audio', 'text'], ['text']) */ outputModalities?: Array<'audio' | 'text'>; /** Temperature for generation (provider-specific range, e.g., 0.6-1.2 for OpenAI) */ temperature?: number; /** Maximum number of tokens in a response */ maxOutputTokens?: number | 'inf'; /** Eagerness level for semantic VAD ('low', 'medium', 'high') */ semanticEagerness?: 'low' | 'medium' | 'high'; /** Provider-specific options */ providerOptions?: Record<string, any>; } /** * Token returned by the server for client authentication */ export interface RealtimeToken { /** Provider identifier */ provider: string; /** The ephemeral token value */ token: string; /** Token expiration timestamp (ms since epoch) */ expiresAt: number; /** Session configuration embedded in the token */ config: RealtimeSessionConfig; } /** * Adapter interface for generating provider-specific tokens */ export interface RealtimeTokenAdapter { /** Provider identifier */ provider: string; /** Generate an ephemeral token for client use */ generateToken: () => Promise<RealtimeToken>; } /** * Options for the realtimeToken function */ export interface RealtimeTokenOptions { /** The token adapter to use */ adapter: RealtimeTokenAdapter; } /** * Text content part in a realtime message */ export interface RealtimeTextPart { type: 'text'; content: string; } /** * Audio content part in a realtime message */ export interface RealtimeAudioPart { type: 'audio'; /** Transcription of the audio */ transcript: string; /** Raw audio data (optional, if stored) */ audioData?: ArrayBuffer; /** Duration of the audio in milliseconds */ durationMs?: number; } /** * Tool call part in a realtime message */ export interface RealtimeToolCallPart { type: 'tool-call'; id: string; name: string; arguments: string; input?: unknown; output?: unknown; } /** * Tool result part in a realtime message */ export interface RealtimeToolResultPart { type: 'tool-result'; toolCallId: string; content: string; } /** * Image content part in a realtime message */ export interface RealtimeImagePart { type: 'image'; /** Base64-encoded image data or a URL */ data: string; /** MIME type of the image (e.g., 'image/png', 'image/jpeg') */ mimeType: string; } /** * Union of all realtime message parts */ export type RealtimeMessagePart = RealtimeTextPart | RealtimeAudioPart | RealtimeToolCallPart | RealtimeToolResultPart | RealtimeImagePart; /** * A message in a realtime conversation */ export interface RealtimeMessage { /** Unique message identifier */ id: string; /** Message role */ role: 'user' | 'assistant'; /** Timestamp when the message was created */ timestamp: number; /** Content parts of the message */ parts: Array<RealtimeMessagePart>; /** Whether this message was interrupted */ interrupted?: boolean; /** Reference to audio buffer if stored */ audioId?: string; /** Duration of the audio in milliseconds */ durationMs?: number; } /** * Connection status of the realtime client */ export type RealtimeStatus = 'idle' | 'connecting' | 'connected' | 'reconnecting' | 'error'; /** * Current mode of the realtime session */ export type RealtimeMode = 'idle' | 'listening' | 'thinking' | 'speaking'; /** * Interface for accessing audio visualization data */ export interface AudioVisualization { /** Input volume level (0-1 normalized) */ readonly inputLevel: number; /** Output volume level (0-1 normalized) */ readonly outputLevel: number; /** Get frequency data for input audio visualization */ getInputFrequencyData: () => Uint8Array; /** Get frequency data for output audio visualization */ getOutputFrequencyData: () => Uint8Array; /** Get time domain data for input waveform */ getInputTimeDomainData: () => Uint8Array; /** Get time domain data for output waveform */ getOutputTimeDomainData: () => Uint8Array; /** Input sample rate */ readonly inputSampleRate: number; /** Output sample rate */ readonly outputSampleRate: number; /** Subscribe to raw input audio samples */ onInputAudio?: (callback: (samples: Float32Array, sampleRate: number) => void) => () => void; /** Subscribe to raw output audio samples */ onOutputAudio?: (callback: (samples: Float32Array, sampleRate: number) => void) => () => void; } /** * Events emitted by the realtime connection */ export type RealtimeEvent = 'status_change' | 'mode_change' | 'transcript' | 'audio_chunk' | 'tool_call' | 'message_complete' | 'interrupted' | 'error'; /** * Event payloads for realtime events */ export interface RealtimeEventPayloads { status_change: { status: RealtimeStatus; }; mode_change: { mode: RealtimeMode; }; transcript: { role: 'user' | 'assistant'; transcript: string; isFinal: boolean; }; audio_chunk: { data: ArrayBuffer; sampleRate: number; }; tool_call: { toolCallId: string; toolName: string; input: unknown; }; message_complete: { message: RealtimeMessage; }; interrupted: { messageId?: string; }; error: { error: Error; }; } /** * Handler type for realtime events */ export type RealtimeEventHandler<TEvent extends RealtimeEvent> = (payload: RealtimeEventPayloads[TEvent]) => void; /** * Error codes for realtime errors */ export type RealtimeErrorCode = 'TOKEN_EXPIRED' | 'CONNECTION_FAILED' | 'PERMISSION_DENIED' | 'PROVIDER_ERROR' | 'UNKNOWN'; /** * Extended error with realtime-specific information */ export interface RealtimeError extends Error { code: RealtimeErrorCode; provider?: string; details?: unknown; }