@hamsa-ai/voice-agents-sdk
Version:
Hamsa AI - Voice Agents JavaScript SDK
538 lines (537 loc) • 18.9 kB
TypeScript
/**
* LiveKitManager - Main orchestrator for voice agent communication
*
* This class serves as the primary interface for managing real-time voice communication
* with AI agents using LiveKit WebRTC infrastructure. It coordinates four specialized
* modules to provide a comprehensive voice agent SDK:
*
* - Connection Management: Handles room connections, participants, and network state
* - Analytics Engine: Processes WebRTC statistics and performance metrics
* - Audio Management: Manages audio tracks, volume control, and quality monitoring
* - Tool Registry: Handles RPC method registration and client-side tool execution
*
* Key features:
* - Real-time audio streaming with automatic quality adjustment
* - Comprehensive analytics and monitoring capabilities
* - Client-side tool integration for extended agent functionality
* - Automatic reconnection and error recovery
* - Event-driven architecture for reactive applications
*
* @example
* ```typescript
* const manager = new LiveKitManager(
* 'wss://livekit.example.com',
* 'access_token',
* [customTool1, customTool2]
* );
*
* manager.on('connected', () => console.log('Connected to voice agent'));
* manager.on('answerReceived', (text) => console.log('Agent said:', text));
*
* await manager.connect();
* ```
*/
import { EventEmitter } from 'events';
import type { Room } from 'livekit-client';
import { type DebugLogger } from '../utils';
import { LiveKitAnalytics } from './livekit-analytics';
import { LiveKitAudioManager } from './livekit-audio-manager';
import { LiveKitConnection } from './livekit-connection';
import { LiveKitToolRegistry } from './livekit-tool-registry';
import type { AudioLevelsResult, CallAnalyticsResult, ConnectionStatsResult, ParticipantData, PerformanceMetricsResult, Tool, TrackStatsResult } from './types';
export type { AgentState, AudioLevelsResult, CallAnalyticsResult, ConnectionStatsResult, ParticipantData, PerformanceMetricsResult, TrackStatsData, TrackStatsResult, } from './types';
/**
* Main LiveKitManager class that orchestrates voice agent communication
*
* This class extends EventEmitter to provide a reactive interface for handling
* voice agent interactions, real-time analytics, and WebRTC connection management.
*/
export default class LiveKitManager extends EventEmitter {
#private;
/** Connection module - manages LiveKit room connections and participants */
connection: LiveKitConnection;
/** Analytics module - processes WebRTC stats and performance metrics */
analytics: LiveKitAnalytics;
/** Audio module - manages audio tracks, volume, and quality */
audioManager: LiveKitAudioManager;
/** Tool registry - handles client-side tool registration and RPC calls */
toolRegistry: LiveKitToolRegistry;
/** LiveKit WebSocket URL for room connection */
lkUrl: string;
/** JWT access token for authentication */
accessToken: string;
/** Debug logger instance for conditional logging */
logger: DebugLogger;
/** CSS selector for the element where avatar video will be rendered */
avatarContainerSelector: string | undefined;
/** Video elements created for avatar tracks, tracked for cleanup */
videoElements: Set<HTMLVideoElement>;
/**
* Creates a new LiveKitManager instance
*
* @param lkUrl - LiveKit WebSocket URL (e.g., 'wss://your-livekit.example.com')
* @param accessToken - JWT token for room access authentication
* @param tools - Array of client-side tools that agents can call during conversations
*
* @example
* ```typescript
* const customTool = {
* function_name: "getUserData",
* description: "Retrieves user information",
* parameters: [{ name: "userId", type: "string", description: "User ID" }],
* required: ["userId"],
* fn: async (userId: string) => ({ name: "John", email: "john@example.com" })
* };
*
* const manager = new LiveKitManager(
* 'wss://livekit.example.com',
* 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...',
* [customTool]
* );
* ```
*/
constructor(lkUrl: string, accessToken: string, tools?: Tool[], { debug, avatarContainerSelector, }?: {
debug?: boolean;
avatarContainerSelector?: string;
});
/**
* Establishes connection to the LiveKit room and initializes voice agent communication
*
* This method performs the following operations:
* - Validates connection state to prevent duplicate connections
* - Updates analytics tracking for connection attempts
* - Establishes WebRTC connection to the LiveKit room
* - Triggers module initialization once connected
*
* @throws {Error} Connection errors from LiveKit (network issues, authentication failures, etc.)
*
* @example
* ```typescript
* try {
* await manager.connect();
* console.log('Successfully connected to voice agent');
* } catch (error) {
* console.error('Failed to connect:', error.message);
* }
* ```
*/
connect(): Promise<void>;
/**
* Terminates the connection to the LiveKit room and performs cleanup
*
* This method safely disconnects from the voice agent and ensures all resources
* are properly released, including audio tracks, analytics timers, and event listeners.
*
* @example
* ```typescript
* await manager.disconnect();
* console.log('Disconnected from voice agent');
* ```
*/
disconnect(): Promise<void>;
/**
* Pauses the voice conversation, stopping audio transmission and reception
*
* This temporarily halts communication with the voice agent while maintaining
* the underlying connection. Audio playback is paused and microphone input
* is muted until resume() is called.
*
* @example
* ```typescript
* manager.pause();
* console.log('Conversation paused');
*
* // Resume later
* setTimeout(() => manager.resume(), 5000);
* ```
*/
pause(): void;
/**
* Resumes a paused voice conversation
*
* Restores audio transmission and reception, allowing continued communication
* with the voice agent. This reverses the effects of pause().
*
* @example
* ```typescript
* manager.resume();
* console.log('Conversation resumed');
* ```
*/
resume(): void;
/**
* Adjusts the volume level for audio playback from the voice agent
*
* @param volume - Volume level between 0.0 (muted) and 1.0 (full volume)
*
* @example
* ```typescript
* // Set to half volume
* manager.setVolume(0.5);
*
* // Mute completely
* manager.setVolume(0);
*
* // Full volume
* manager.setVolume(1.0);
* ```
*/
setVolume(volume: number): void;
/**
* Gets the current LiveKit room instance
*
* @returns The LiveKit Room object if connected, null otherwise
*
* @example
* ```typescript
* const room = manager.room;
* if (room) {
* console.log('Connected to room:', room.name);
* console.log('Participants:', room.remoteParticipants.size);
* }
* ```
*/
get room(): Room | null;
/**
* Checks if currently connected to the voice agent
*
* @returns True if connected to LiveKit room, false otherwise
*
* @example
* ```typescript
* if (manager.isConnected) {
* console.log('Ready for voice communication');
* } else {
* console.log('Not connected - call connect() first');
* }
* ```
*/
get isConnected(): boolean;
/**
* Checks if the conversation is currently paused
*
* @returns True if paused, false if active or disconnected
*
* @example
* ```typescript
* if (manager.isPaused) {
* console.log('Conversation is paused');
* showResumeButton();
* }
* ```
*/
get isPaused(): boolean;
/**
* Gets the current audio volume level
*
* @returns Current volume between 0.0 (muted) and 1.0 (full volume)
*
* @example
* ```typescript
* const currentVolume = manager.volume;
* updateVolumeSlider(currentVolume);
* ```
*/
get volume(): number;
/**
* Gets the set of active HTML audio elements
*
* @returns Set of HTMLAudioElement instances currently playing agent audio
*
* @example
* ```typescript
* const audioElements = manager.audioElements;
* console.log(`Active audio elements: ${audioElements.size}`);
* ```
*/
get audioElements(): Set<HTMLAudioElement>;
/**
* Gets the array of registered client-side tools
*
* @returns Array of Tool objects available for agent execution
*
* @example
* ```typescript
* const registeredTools = manager.tools;
* console.log(`Available tools: ${registeredTools.map(t => t.function_name).join(', ')}`);
* ```
*/
get tools(): Tool[];
/**
* Gets raw call statistics from the analytics module
*
* @returns Internal call statistics object with WebRTC metrics
* @internal
*/
get callStats(): import("./types").CallStats;
/**
* Gets raw connection metrics from the analytics module
*
* @returns Internal connection metrics object
* @internal
*/
get connectionMetrics(): import("./types").ConnectionMetrics;
/**
* Gets raw audio metrics from the analytics module
*
* @returns Internal audio metrics object
* @internal
*/
get audioMetrics(): import("./types").AudioMetrics;
/**
* Gets raw performance metrics from the analytics module
*
* @returns Internal performance metrics object
* @internal
*/
get performanceMetrics(): import("./types").PerformanceMetrics;
/**
* Gets the analytics collection interval timer
*
* @returns NodeJS.Timeout for the analytics interval, or null if not collecting
* @internal
*/
get analyticsInterval(): NodeJS.Timeout | null;
/**
* Gets the timestamp when the call started
*
* @returns Unix timestamp in milliseconds when call began, null if not started
*
* @example
* ```typescript
* const startTime = manager.callStartTime;
* if (startTime) {
* const duration = Date.now() - startTime;
* console.log(`Call duration: ${Math.floor(duration / 1000)}s`);
* }
* ```
*/
get callStartTime(): number | null;
/**
* Gets the map of active participants in the room
*
* @returns Map of participant SIDs to Participant objects
* @internal Use getParticipants() for structured participant data
*/
get participants(): Map<string, ParticipantData>;
/**
* Gets the raw track statistics map
*
* @returns Map of track IDs to track data objects
* @internal Use getTrackStats() for structured track statistics
*/
get trackStats(): Map<string, import("./types").TrackStatsData>;
/**
* Retrieves current network connection statistics and quality metrics
*
* @returns Object containing connection quality, connection counts, and timing data
*
* @example
* ```typescript
* const stats = manager.getConnectionStats();
* console.log(`Connection quality: ${stats.quality}`);
* console.log(`Connection attempts: ${stats.connectionAttempts}`);
* console.log(`Reconnections: ${stats.reconnectionAttempts}`);
*
* if (stats.quality === 'poor') {
* showNetworkWarning();
* }
* ```
*/
getConnectionStats(): ConnectionStatsResult;
/**
* Retrieves current audio levels and quality metrics for both user and agent
*
* @returns Object containing audio levels, speaking times, quality metrics, pause state, and volume
*
* @example
* ```typescript
* const audio = manager.getAudioLevels();
*
* // Update audio level indicators in UI
* updateMeterBar('user-audio', audio.userAudioLevel);
* updateMeterBar('agent-audio', audio.agentAudioLevel);
*
* // Show speaking time statistics
* console.log(`User spoke for ${audio.userSpeakingTime / 1000}s`);
* console.log(`Agent spoke for ${audio.agentSpeakingTime / 1000}s`);
* ```
*/
getAudioLevels(): AudioLevelsResult & {
isPaused: boolean;
volume: number;
};
/**
* Retrieves current performance metrics including response times and call duration
*
* @returns Object containing response times, call duration, and connection timing
*
* @example
* ```typescript
* const perf = manager.getPerformanceMetrics();
*
* // Monitor response time for agent interactions
* if (perf.responseTime > 3000) {
* console.warn('High response time detected:', perf.responseTime + 'ms');
* }
*
* // Display call duration
* const minutes = Math.floor(perf.callDuration / 60000);
* const seconds = Math.floor((perf.callDuration % 60000) / 1000);
* console.log(`Call duration: ${minutes}:${seconds.toString().padStart(2, '0')}`);
* ```
*/
getPerformanceMetrics(): PerformanceMetricsResult;
/**
* Retrieves structured information about all participants in the room
*
* @returns Array of ParticipantData objects with identity, connection info, and metadata
*
* @example
* ```typescript
* const participants = manager.getParticipants();
*
* participants.forEach(participant => {
* console.log(`Participant: ${participant.identity}`);
* console.log(`Connected at: ${new Date(participant.connectionTime)}`);
*
* if (participant.metadata) {
* console.log(`Metadata: ${participant.metadata}`);
* }
* });
*
* // Find the agent participant
* const agent = participants.find(p => p.identity.includes('agent'));
* ```
*/
getParticipants(): ParticipantData[];
/**
* Retrieves current audio track statistics and stream information
*
* @returns Object containing track counts, audio element info, and detailed track data
*
* @example
* ```typescript
* const trackStats = manager.getTrackStats();
*
* console.log(`Active tracks: ${trackStats.activeTracks}/${trackStats.totalTracks}`);
* console.log(`Audio elements: ${trackStats.audioElements}`);
*
* // Inspect individual tracks
* trackStats.trackDetails.forEach(([trackId, data]) => {
* console.log(`Track ${trackId}: ${data.kind} from ${data.participant}`);
* });
* ```
*/
getTrackStats(): TrackStatsResult;
/**
* Retrieves comprehensive analytics combining all metrics into a single snapshot
*
* This is the primary method for accessing complete call analytics, combining
* connection statistics, audio metrics, performance data, participant info,
* track statistics, and call metadata into a unified result.
*
* @returns Complete analytics object with all available metrics and metadata
*
* @example
* ```typescript
* const analytics = manager.getCallAnalytics();
*
* // Log comprehensive call summary
* console.log('=== Call Analytics ===');
* console.log(`Duration: ${analytics.performanceMetrics.callDuration}ms`);
* console.log(`Quality: ${analytics.connectionStats.quality}`);
* console.log(`Participants: ${analytics.participants.length}`);
* console.log(`Tracks: ${analytics.trackStats.activeTracks}`);
*
* // Send to analytics service
* analyticsService.recordCall({
* sessionId: generateSessionId(),
* timestamp: Date.now(),
* data: analytics
* });
*
* // Check for quality issues
* if (analytics.connectionStats.quality === 'poor') {
* reportNetworkIssue(analytics.connectionStats);
* }
* ```
*/
getCallAnalytics(): CallAnalyticsResult;
/**
* Registers client-side tools that voice agents can call during conversations
*
* This method updates the available tools and registers them as RPC methods
* with the LiveKit room for remote execution by voice agents.
*
* @param tools - Optional array of Tool objects to register. If not provided,
* uses tools from constructor or previously set tools.
*
* @example
* ```typescript
* const userDataTool = {
* function_name: "getUserProfile",
* description: "Retrieves user profile information",
* parameters: [
* { name: "userId", type: "string", description: "User ID to lookup" }
* ],
* required: ["userId"],
* fn: async (userId: string) => {
* const user = await userService.getProfile(userId);
* return { name: user.name, email: user.email, plan: user.subscription };
* }
* };
*
* const weatherTool = {
* function_name: "getCurrentWeather",
* description: "Gets current weather for a location",
* parameters: [
* { name: "location", type: "string", description: "City name" }
* ],
* required: ["location"],
* fn: async (location: string) => {
* return await weatherAPI.getCurrent(location);
* }
* };
*
* // Register new tools after connection
* manager.registerTools([userDataTool, weatherTool]);
*
* // Agent can now call these tools during conversation
* manager.on('answerReceived', (text) => {
* console.log('Agent response:', text);
* // Agent might say: "I found your profile! You're on the premium plan."
* });
* ```
*/
registerTools(tools?: Tool[]): void;
/**
* Performs comprehensive cleanup of all modules and resources
*
* This method ensures all resources are properly released, including:
* - WebRTC connections and media streams
* - Audio elements and playback resources
* - Analytics timers and event listeners
* - Tool registry and RPC handlers
*
* Called automatically on disconnect, but can be called manually for
* explicit resource management in complex applications.
*
* @example
* ```typescript
* // Explicit cleanup when component unmounts
* useEffect(() => {
* return () => {
* manager.cleanup();
* };
* }, []);
*
* // Cleanup before reconnecting with different configuration
* await manager.disconnect();
* manager.cleanup();
*
* const newManager = new LiveKitManager(newUrl, newToken, newTools);
* ```
*/
cleanup(): void;
}