@tanstack/ai
Version:
Core TanStack AI library - Open source AI SDK
295 lines (264 loc) • 7.76 kB
text/typescript
// ============================================================================
// Token Types
// ============================================================================
/**
* Voice activity detection configuration
*/
export interface VADConfig {
/** Sensitivity threshold (0.0-1.0) */
threshold?: number
/** Audio to include before speech detection (ms) */
prefixPaddingMs?: number
/** Silence duration to end turn (ms) */
silenceDurationMs?: number
}
/**
* Serializable tool descriptor for realtime session configuration.
* Contains only the metadata needed by providers, not Zod schemas or execute functions.
*/
export interface RealtimeToolConfig {
name: string
description: string
inputSchema?: Record<string, any>
}
/**
* Configuration for a realtime session
*/
export interface RealtimeSessionConfig {
/** Model to use for the session */
model?: string
/** Voice to use for audio output */
voice?: string
/** System instructions for the assistant */
instructions?: string
/** Tools available in the session */
tools?: Array<RealtimeToolConfig>
/** VAD mode */
vadMode?: 'server' | 'semantic' | 'manual'
/** VAD configuration */
vadConfig?: VADConfig
/** Output modalities for responses (e.g., ['audio', 'text'], ['text']) */
outputModalities?: Array<'audio' | 'text'>
/** Temperature for generation (provider-specific range, e.g., 0.6-1.2 for OpenAI) */
temperature?: number
/** Maximum number of tokens in a response */
maxOutputTokens?: number | 'inf'
/** Eagerness level for semantic VAD ('low', 'medium', 'high') */
semanticEagerness?: 'low' | 'medium' | 'high'
/** Provider-specific options */
providerOptions?: Record<string, any>
}
/**
* Token returned by the server for client authentication
*/
export interface RealtimeToken {
/** Provider identifier */
provider: string
/** The ephemeral token value */
token: string
/** Token expiration timestamp (ms since epoch) */
expiresAt: number
/** Session configuration embedded in the token */
config: RealtimeSessionConfig
}
/**
* Adapter interface for generating provider-specific tokens
*/
export interface RealtimeTokenAdapter {
/** Provider identifier */
provider: string
/** Generate an ephemeral token for client use */
generateToken: () => Promise<RealtimeToken>
}
/**
* Options for the realtimeToken function
*/
export interface RealtimeTokenOptions {
/** The token adapter to use */
adapter: RealtimeTokenAdapter
}
// ============================================================================
// Message Types
// ============================================================================
/**
* Text content part in a realtime message
*/
export interface RealtimeTextPart {
type: 'text'
content: string
}
/**
* Audio content part in a realtime message
*/
export interface RealtimeAudioPart {
type: 'audio'
/** Transcription of the audio */
transcript: string
/** Raw audio data (optional, if stored) */
audioData?: ArrayBuffer
/** Duration of the audio in milliseconds */
durationMs?: number
}
/**
* Tool call part in a realtime message
*/
export interface RealtimeToolCallPart {
type: 'tool-call'
id: string
name: string
arguments: string
input?: unknown
output?: unknown
}
/**
* Tool result part in a realtime message
*/
export interface RealtimeToolResultPart {
type: 'tool-result'
toolCallId: string
content: string
}
/**
* Image content part in a realtime message
*/
export interface RealtimeImagePart {
type: 'image'
/** Base64-encoded image data or a URL */
data: string
/** MIME type of the image (e.g., 'image/png', 'image/jpeg') */
mimeType: string
}
/**
* Union of all realtime message parts
*/
export type RealtimeMessagePart =
| RealtimeTextPart
| RealtimeAudioPart
| RealtimeToolCallPart
| RealtimeToolResultPart
| RealtimeImagePart
/**
* A message in a realtime conversation
*/
export interface RealtimeMessage {
/** Unique message identifier */
id: string
/** Message role */
role: 'user' | 'assistant'
/** Timestamp when the message was created */
timestamp: number
/** Content parts of the message */
parts: Array<RealtimeMessagePart>
/** Whether this message was interrupted */
interrupted?: boolean
/** Reference to audio buffer if stored */
audioId?: string
/** Duration of the audio in milliseconds */
durationMs?: number
}
// ============================================================================
// Status Types
// ============================================================================
/**
* Connection status of the realtime client
*/
export type RealtimeStatus =
| 'idle'
| 'connecting'
| 'connected'
| 'reconnecting'
| 'error'
/**
* Current mode of the realtime session
*/
export type RealtimeMode = 'idle' | 'listening' | 'thinking' | 'speaking'
// ============================================================================
// Audio Visualization Types
// ============================================================================
/**
* Interface for accessing audio visualization data
*/
export interface AudioVisualization {
/** Input volume level (0-1 normalized) */
readonly inputLevel: number
/** Output volume level (0-1 normalized) */
readonly outputLevel: number
/** Get frequency data for input audio visualization */
getInputFrequencyData: () => Uint8Array
/** Get frequency data for output audio visualization */
getOutputFrequencyData: () => Uint8Array
/** Get time domain data for input waveform */
getInputTimeDomainData: () => Uint8Array
/** Get time domain data for output waveform */
getOutputTimeDomainData: () => Uint8Array
/** Input sample rate */
readonly inputSampleRate: number
/** Output sample rate */
readonly outputSampleRate: number
/** Subscribe to raw input audio samples */
onInputAudio?: (
callback: (samples: Float32Array, sampleRate: number) => void,
) => () => void
/** Subscribe to raw output audio samples */
onOutputAudio?: (
callback: (samples: Float32Array, sampleRate: number) => void,
) => () => void
}
// ============================================================================
// Event Types
// ============================================================================
/**
* Events emitted by the realtime connection
*/
export type RealtimeEvent =
| 'status_change'
| 'mode_change'
| 'transcript'
| 'audio_chunk'
| 'tool_call'
| 'message_complete'
| 'interrupted'
| 'error'
/**
* Event payloads for realtime events
*/
export interface RealtimeEventPayloads {
status_change: { status: RealtimeStatus }
mode_change: { mode: RealtimeMode }
transcript: {
role: 'user' | 'assistant'
transcript: string
isFinal: boolean
}
audio_chunk: { data: ArrayBuffer; sampleRate: number }
tool_call: { toolCallId: string; toolName: string; input: unknown }
message_complete: { message: RealtimeMessage }
interrupted: { messageId?: string }
error: { error: Error }
}
/**
* Handler type for realtime events
*/
export type RealtimeEventHandler<TEvent extends RealtimeEvent> = (
payload: RealtimeEventPayloads[TEvent],
) => void
// ============================================================================
// Error Types
// ============================================================================
/**
* Error codes for realtime errors
*/
export type RealtimeErrorCode =
| 'TOKEN_EXPIRED'
| 'CONNECTION_FAILED'
| 'PERMISSION_DENIED'
| 'PROVIDER_ERROR'
| 'UNKNOWN'
/**
* Extended error with realtime-specific information
*/
export interface RealtimeError extends Error {
code: RealtimeErrorCode
provider?: string
details?: unknown
}