@voxket-ai/voxket-live
Version:
A React widget for embedding Voxket-powered audio/video/chat experiences.
246 lines (245 loc) • 8.75 kB
TypeScript
import { LiveAvatarSession, SessionState, SessionDisconnectReason, ConnectionQuality } from '@heygen/liveavatar-web-sdk';
import { VoxketEventEmitter } from './event-emitter';
import { VoxketEvents } from '../types/core';
/**
* Configuration to create a LiveAvatar session token directly from the client.
* Pass this via VoxketWidgetProps.liveAvatarConfig or VoxketClientConfig.liveAvatarConfig
*/
export interface LiveAvatarClientConfig {
/** Your LiveAvatar API key — defaults to Voxket's built-in key */
apiKey?: string;
/** Avatar ID — defaults to Voxket's default avatar */
avatarId?: string;
/** Voice ID — defaults to Voxket's default voice */
voiceId?: string;
/** Context ID — determines how the avatar thinks and responds */
contextId?: string;
/** Language code (e.g., 'en') — defaults to 'en' */
language?: string;
/** Enable sandbox mode (conserves credits during development) */
isSandbox?: boolean;
/** Custom LiveAvatar API URL — defaults to https://api.liveavatar.com */
apiUrl?: string;
/** Interactivity type: 'CONVERSATIONAL' (default) or 'PUSH_TO_TALK' */
interactivityType?: 'CONVERSATIONAL' | 'PUSH_TO_TALK';
/** Enable voice chat (microphone) — defaults to true */
voiceChat?: boolean;
/** Video quality: 'very_high' (1080p), 'high' (720p), 'medium' (480p), 'low' (360p) */
videoQuality?: 'very_high' | 'high' | 'medium' | 'low';
/** Video encoding: 'VP8' or 'H264' */
videoEncoding?: 'VP8' | 'H264';
}
/**
* Configuration for the Voxket audio tap gateway.
* When provided, the user's mic audio is also published to a Voxket LiveKit room
* so that backend STT workers can transcribe and log conversations — while LiveAvatar
* FULL mode continues to handle the avatar, LLM, TTS, and ASR on its own LiveKit room.
*/
export interface VoxketAudioTapConfig {
/** Voxket LiveKit server URL (wss://...) */
serverUrl: string;
/** Participant token for the user to join the Voxket room */
participantToken: string;
/** Voxket session ID for this session */
voxketSessionId: string;
}
/**
* Internal config used to start a session from an already-obtained token
*/
export interface LiveAvatarTokenConfig {
/** Session token obtained from POST /v1/sessions/token */
sessionToken: string;
/** Optional custom API URL for LiveAvatar */
apiUrl?: string;
/** Enable voice chat (microphone) - defaults to true */
voiceChat?: boolean;
}
/**
* LiveAvatar events emitted through the Voxket event system
*/
export interface LiveAvatarEvents {
'liveavatar.session.state_changed': (state: SessionState) => void;
'liveavatar.session.stream_ready': () => void;
'liveavatar.session.disconnected': (reason: SessionDisconnectReason) => void;
'liveavatar.session.connection_quality_changed': (quality: ConnectionQuality) => void;
'liveavatar.user.speak_started': (data: any) => void;
'liveavatar.user.speak_ended': (data: any) => void;
'liveavatar.user.transcription': (data: {
text: string;
}) => void;
'liveavatar.avatar.speak_started': (data: any) => void;
'liveavatar.avatar.speak_ended': (data: any) => void;
'liveavatar.avatar.transcription': (data: {
text: string;
}) => void;
}
/**
* LiveAvatarManager
*
* Manages the LiveAvatar FULL mode session lifecycle with an optional
* Voxket audio tap gateway for transcription and analytics.
*
* Architecture (Audio Tap / SFU Gateway):
*
* User Browser Mic
* │
* ├──► LiveAvatar FULL Mode (LiveAvatar's LiveKit room)
* │ → Handles avatar video, LLM, TTS, ASR
* │
* └──► Voxket LiveKit Room (audio tap)
* → STT worker subscribes to audio
* → Transcriptions stored on Voxket platform
* → Analytics, logging, monitoring
*
* The mic audio is captured once and published to BOTH rooms simultaneously
* using WebRTC — no extra encoding, lowest latency, production-grade.
*
* Flow:
* 1. Client creates a Voxket backend session → gets room URL + token
* 2. Client creates LiveAvatar FULL mode session → avatar starts
* 3. Mic audio track is cloned and published to Voxket's LiveKit room
* 4. Voxket backend STT workers subscribe to the audio track
* 5. LiveAvatar transcription events are also forwarded to Voxket
*/
export declare class LiveAvatarManager {
private session;
private eventEmitter;
private keepAliveInterval;
private _isStreamReady;
private _sessionState;
private _isAvatarTalking;
private _isUserTalking;
private _sessionId;
private _sessionToken;
private _clientConfig;
/** Track messages sent via sendMessage() to avoid duplicate chat.message.sent emissions */
private _recentSentMessages;
private _audioTapRoom;
private _audioTapTrack;
private _audioTapConfig;
private _audioTapConnected;
constructor(eventEmitter: VoxketEventEmitter<VoxketEvents>);
/**
* Create a session token by calling the LiveAvatar API directly,
* then start the LiveAvatar FULL mode session.
*/
createAndStart(config: LiveAvatarClientConfig): Promise<void>;
/**
* Start a LiveAvatar FULL mode session with an already-obtained token
*/
startWithToken(config: LiveAvatarTokenConfig): Promise<void>;
/**
* Stop the LiveAvatar session
*/
stop(): Promise<void>;
/**
* Attach the avatar video/audio stream to an HTML media element
*/
attach(element: HTMLMediaElement): void;
/**
* Send a text message to the avatar (it will respond via LLM)
*/
sendMessage(message: string): void;
/**
* Make the avatar repeat a specific text (TTS only, no LLM)
*/
repeat(text: string): void;
/**
* Interrupt the avatar's current speech
*/
interrupt(): void;
/**
* Start listening (for push-to-talk mode)
*/
startListening(): void;
/**
* Stop listening (for push-to-talk mode)
*/
stopListening(): void;
/**
* Mute the user's microphone in the LiveAvatar session
*/
muteMicrophone(): Promise<void>;
/**
* Unmute the user's microphone in the LiveAvatar session
*/
unmuteMicrophone(): Promise<void>;
/**
* Check if the mic is muted in the LiveAvatar session
*/
get isMicMuted(): boolean;
/**
* Keep the session alive
*/
keepAlive(): Promise<void>;
/**
* Get the underlying LiveAvatarSession
*/
getSession(): LiveAvatarSession | null;
/**
* Whether the avatar video stream is ready to display
*/
get isStreamReady(): boolean;
/**
* Current session state
*/
get sessionState(): SessionState;
/**
* Whether the avatar is currently talking
*/
get isAvatarTalking(): boolean;
/**
* Whether the user is currently talking
*/
get isUserTalking(): boolean;
/**
* Whether a session is active
*/
get isActive(): boolean;
/**
* Max session duration from the server
*/
get maxSessionDuration(): number | null;
/**
* LiveAvatar session ID (from token creation)
*/
get sessionId(): string | null;
/**
* Whether the audio tap (Voxket LiveKit room) is connected
*/
get isAudioTapConnected(): boolean;
/**
* Connect the audio tap gateway to Voxket's LiveKit room.
* This duplicates the user's mic audio into Voxket's room so backend
* STT workers can transcribe and log conversations.
*
* Should be called AFTER LiveAvatar session is started (so mic is active).
*
* Architecture:
* Browser Mic → LiveAvatar FULL Mode (avatar, LLM, TTS)
* → Voxket LiveKit Room (STT workers, analytics)
*
* The audio track is captured once via getUserMedia and published to
* both rooms via WebRTC — no re-encoding, ~30ms additional latency.
*/
connectAudioTap(config: VoxketAudioTapConfig): Promise<void>;
/**
* Disconnect the audio tap from Voxket's LiveKit room.
* Does not affect the LiveAvatar session.
*/
disconnectAudioTap(): Promise<void>;
/**
* Mute/unmute the audio tap track to match LiveAvatar mic state.
* Called internally when the user toggles their mic.
*/
private syncAudioTapMuteState;
/**
* Setup event listeners for the Voxket audio tap room.
* Listens for transcription data streams from STT workers.
*/
private setupAudioTapEventListeners;
private setupEventListeners;
private startKeepAlive;
private stopKeepAlive;
private cleanup;
}