UNPKG

@voxket-ai/voxket-live

Version:

A React widget for embedding Voxket-powered audio/video/chat experiences.

246 lines (245 loc) 8.75 kB
import { LiveAvatarSession, SessionState, SessionDisconnectReason, ConnectionQuality } from '@heygen/liveavatar-web-sdk'; import { VoxketEventEmitter } from './event-emitter'; import { VoxketEvents } from '../types/core'; /** * Configuration to create a LiveAvatar session token directly from the client. * Pass this via VoxketWidgetProps.liveAvatarConfig or VoxketClientConfig.liveAvatarConfig */ export interface LiveAvatarClientConfig { /** Your LiveAvatar API key — defaults to Voxket's built-in key */ apiKey?: string; /** Avatar ID — defaults to Voxket's default avatar */ avatarId?: string; /** Voice ID — defaults to Voxket's default voice */ voiceId?: string; /** Context ID — determines how the avatar thinks and responds */ contextId?: string; /** Language code (e.g., 'en') — defaults to 'en' */ language?: string; /** Enable sandbox mode (conserves credits during development) */ isSandbox?: boolean; /** Custom LiveAvatar API URL — defaults to https://api.liveavatar.com */ apiUrl?: string; /** Interactivity type: 'CONVERSATIONAL' (default) or 'PUSH_TO_TALK' */ interactivityType?: 'CONVERSATIONAL' | 'PUSH_TO_TALK'; /** Enable voice chat (microphone) — defaults to true */ voiceChat?: boolean; /** Video quality: 'very_high' (1080p), 'high' (720p), 'medium' (480p), 'low' (360p) */ videoQuality?: 'very_high' | 'high' | 'medium' | 'low'; /** Video encoding: 'VP8' or 'H264' */ videoEncoding?: 'VP8' | 'H264'; } /** * Configuration for the Voxket audio tap gateway. * When provided, the user's mic audio is also published to a Voxket LiveKit room * so that backend STT workers can transcribe and log conversations — while LiveAvatar * FULL mode continues to handle the avatar, LLM, TTS, and ASR on its own LiveKit room. */ export interface VoxketAudioTapConfig { /** Voxket LiveKit server URL (wss://...) */ serverUrl: string; /** Participant token for the user to join the Voxket room */ participantToken: string; /** Voxket session ID for this session */ voxketSessionId: string; } /** * Internal config used to start a session from an already-obtained token */ export interface LiveAvatarTokenConfig { /** Session token obtained from POST /v1/sessions/token */ sessionToken: string; /** Optional custom API URL for LiveAvatar */ apiUrl?: string; /** Enable voice chat (microphone) - defaults to true */ voiceChat?: boolean; } /** * LiveAvatar events emitted through the Voxket event system */ export interface LiveAvatarEvents { 'liveavatar.session.state_changed': (state: SessionState) => void; 'liveavatar.session.stream_ready': () => void; 'liveavatar.session.disconnected': (reason: SessionDisconnectReason) => void; 'liveavatar.session.connection_quality_changed': (quality: ConnectionQuality) => void; 'liveavatar.user.speak_started': (data: any) => void; 'liveavatar.user.speak_ended': (data: any) => void; 'liveavatar.user.transcription': (data: { text: string; }) => void; 'liveavatar.avatar.speak_started': (data: any) => void; 'liveavatar.avatar.speak_ended': (data: any) => void; 'liveavatar.avatar.transcription': (data: { text: string; }) => void; } /** * LiveAvatarManager * * Manages the LiveAvatar FULL mode session lifecycle with an optional * Voxket audio tap gateway for transcription and analytics. * * Architecture (Audio Tap / SFU Gateway): * * User Browser Mic * │ * ├──► LiveAvatar FULL Mode (LiveAvatar's LiveKit room) * │ → Handles avatar video, LLM, TTS, ASR * │ * └──► Voxket LiveKit Room (audio tap) * → STT worker subscribes to audio * → Transcriptions stored on Voxket platform * → Analytics, logging, monitoring * * The mic audio is captured once and published to BOTH rooms simultaneously * using WebRTC — no extra encoding, lowest latency, production-grade. * * Flow: * 1. Client creates a Voxket backend session → gets room URL + token * 2. Client creates LiveAvatar FULL mode session → avatar starts * 3. Mic audio track is cloned and published to Voxket's LiveKit room * 4. Voxket backend STT workers subscribe to the audio track * 5. LiveAvatar transcription events are also forwarded to Voxket */ export declare class LiveAvatarManager { private session; private eventEmitter; private keepAliveInterval; private _isStreamReady; private _sessionState; private _isAvatarTalking; private _isUserTalking; private _sessionId; private _sessionToken; private _clientConfig; /** Track messages sent via sendMessage() to avoid duplicate chat.message.sent emissions */ private _recentSentMessages; private _audioTapRoom; private _audioTapTrack; private _audioTapConfig; private _audioTapConnected; constructor(eventEmitter: VoxketEventEmitter<VoxketEvents>); /** * Create a session token by calling the LiveAvatar API directly, * then start the LiveAvatar FULL mode session. */ createAndStart(config: LiveAvatarClientConfig): Promise<void>; /** * Start a LiveAvatar FULL mode session with an already-obtained token */ startWithToken(config: LiveAvatarTokenConfig): Promise<void>; /** * Stop the LiveAvatar session */ stop(): Promise<void>; /** * Attach the avatar video/audio stream to an HTML media element */ attach(element: HTMLMediaElement): void; /** * Send a text message to the avatar (it will respond via LLM) */ sendMessage(message: string): void; /** * Make the avatar repeat a specific text (TTS only, no LLM) */ repeat(text: string): void; /** * Interrupt the avatar's current speech */ interrupt(): void; /** * Start listening (for push-to-talk mode) */ startListening(): void; /** * Stop listening (for push-to-talk mode) */ stopListening(): void; /** * Mute the user's microphone in the LiveAvatar session */ muteMicrophone(): Promise<void>; /** * Unmute the user's microphone in the LiveAvatar session */ unmuteMicrophone(): Promise<void>; /** * Check if the mic is muted in the LiveAvatar session */ get isMicMuted(): boolean; /** * Keep the session alive */ keepAlive(): Promise<void>; /** * Get the underlying LiveAvatarSession */ getSession(): LiveAvatarSession | null; /** * Whether the avatar video stream is ready to display */ get isStreamReady(): boolean; /** * Current session state */ get sessionState(): SessionState; /** * Whether the avatar is currently talking */ get isAvatarTalking(): boolean; /** * Whether the user is currently talking */ get isUserTalking(): boolean; /** * Whether a session is active */ get isActive(): boolean; /** * Max session duration from the server */ get maxSessionDuration(): number | null; /** * LiveAvatar session ID (from token creation) */ get sessionId(): string | null; /** * Whether the audio tap (Voxket LiveKit room) is connected */ get isAudioTapConnected(): boolean; /** * Connect the audio tap gateway to Voxket's LiveKit room. * This duplicates the user's mic audio into Voxket's room so backend * STT workers can transcribe and log conversations. * * Should be called AFTER LiveAvatar session is started (so mic is active). * * Architecture: * Browser Mic → LiveAvatar FULL Mode (avatar, LLM, TTS) * → Voxket LiveKit Room (STT workers, analytics) * * The audio track is captured once via getUserMedia and published to * both rooms via WebRTC — no re-encoding, ~30ms additional latency. */ connectAudioTap(config: VoxketAudioTapConfig): Promise<void>; /** * Disconnect the audio tap from Voxket's LiveKit room. * Does not affect the LiveAvatar session. */ disconnectAudioTap(): Promise<void>; /** * Mute/unmute the audio tap track to match LiveAvatar mic state. * Called internally when the user toggles their mic. */ private syncAudioTapMuteState; /** * Setup event listeners for the Voxket audio tap room. * Listens for transcription data streams from STT workers. */ private setupAudioTapEventListeners; private setupEventListeners; private startKeepAlive; private stopKeepAlive; private cleanup; }