@hamsa-ai/voice-agents-sdk

import { EventEmitter } from 'events'; import type { ConnectionState, LocalTrack, LocalTrackPublication, Participant, RemoteParticipant, RemoteTrack, Room } from 'livekit-client'; import LiveKitManager, { type AgentState, type AudioLevelsResult, type CallAnalyticsResult, type ConnectionStatsResult, type ParticipantData, type PerformanceMetricsResult, type TrackStatsResult } from './classes/livekit-manager'; import ScreenWakeLock from './classes/screen-wake-lock'; import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, DTMFDigit, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types'; export type { RpcInvocationData } from 'livekit-client'; export { RpcError } from 'livekit-client'; export type { AgentState } from './classes/livekit-manager'; export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, DTMFDigit, } from './classes/types'; /** * Custom error class that includes both human-readable message and machine-readable messageKey * for internationalization and programmatic error handling */ declare class HamsaApiError extends Error { /** Machine-readable error key for i18n or programmatic handling */ readonly messageKey?: string; constructor(message: string, messageKey?: string); } /** * Supported deployment regions for the Hamsa platform. * Determines the default API and LiveKit URLs used for the connection. */ export type Region = 'eu' | 'uae'; /** * Configuration options for the HamsaVoiceAgent constructor * Allows customization of API endpoints and other global settings */ type HamsaVoiceAgentConfig = { /** * Deployment region for the Hamsa platform. Determines default API and LiveKit URLs. * Defaults to 'eu'. Ignored when API_URL or LIVEKIT_URL are explicitly provided. */ region?: Region; /** Base URL for the Hamsa API. Overrides the region default when provided. */ API_URL?: string; /** LiveKit RTC WebSocket URL. Overrides the region default when provided. */ LIVEKIT_URL?: string; /** Enable debug logging for troubleshooting. Defaults to false */ debug?: boolean; }; /** * Configuration options for starting a voice agent conversation * * Defines the agent to use, conversation parameters, voice capabilities, * and client-side tools that will be available during the conversation. */ type ConnectionDelays = { /** Delay in milliseconds for Android devices */ android?: number; /** Delay in milliseconds for iOS devices */ ios?: number; /** Default delay in milliseconds for other devices */ default?: number; }; type StartOptions = { /** Unique identifier of the voice agent to start (from Hamsa dashboard) */ agentId: string; /** * Optional parameters to pass to the agent for conversation customization * These can be referenced in agent prompts using {{parameter_name}} syntax * @example { userName: "John", orderNumber: "12345", userTier: "premium" } */ params?: Record<string, unknown>; /** Whether to enable voice interactions. If false, agent runs in text-only mode */ voiceEnablement?: boolean; /** Array of client-side tools that the agent can call during conversations */ tools?: Tool[]; /** Optional user identifier for tracking and analytics */ userId?: string; /** Force headphones usage on iOS devices when available */ preferHeadphonesForIosDevices?: boolean; /** Platform-specific connection delays to prevent audio cutoff */ connectionDelay?: ConnectionDelays; /** Disable wake lock to allow device sleep during conversation */ disableWakeLock?: boolean; /** * CSS selector for the container element where the avatar video will be rendered. * When provided, the agent's video track (avatar) will be attached to this element. * @example '#avatar-container' * @example '.agent-video-wrapper' */ avatarContainerSelector?: string; /** * Simple callback to receive agent audio data (Level 1 API - Simplest) * Automatically captures agent audio in opus-webm format with 100ms chunks * @example * ```typescript * onAudioData: (audioData) => { * thirdPartySocket.send(audioData); * } * ``` */ onAudioData?: AudioCaptureCallback; /** * Advanced audio capture configuration (Level 2 API - More Control) * Use this if you need to specify format, source, or other options * @example * ```typescript * captureAudio: { * source: 'both', * format: 'pcm-f32', * onData: (audioData, metadata) => { * processAudio(audioData, metadata); * } * } * ``` */ captureAudio?: AudioCaptureOptions; }; /** * Definition of a client-side tool that can be called by the voice agent * * Tools allow agents to execute custom functions in the client environment, * such as retrieving user data, making API calls, or performing calculations. */ type Tool = { /** Unique name for the function (used by agent to identify the tool) */ function_name: string; /** Clear description of what the function does (helps agent decide when to use it) */ description: string; /** Array of parameters the function accepts */ parameters?: ToolParameter[]; /** Array of parameter names that are required for the function */ required?: string[]; /** Internal function mapping (used for tool execution) */ func_map?: Record<string, unknown>; /** The implementation function to execute when the agent calls this tool */ fn?: (...args: unknown[]) => unknown | Promise<unknown>; }; /** * Definition of a parameter for a client-side tool * Describes the input that the function expects from the agent */ type ToolParameter = { /** Name of the parameter */ name: string; /** Data type of the parameter (e.g., 'string', 'number', 'boolean') */ type: string; /** Description of what the parameter represents */ description: string; }; /** * Response format for job details from the Hamsa API * * Returned by getJobDetails() method to check conversation completion * status and retrieve additional job metadata. */ type JobDetails = { /** Current status of the job (e.g., 'COMPLETED', 'IN_PROGRESS', 'FAILED') */ status: string; /** Additional job properties that may be returned by the API */ [key: string]: unknown; }; /** * Data object passed to callStarted event handlers */ type CallStartedData = { /** Unique job/call ID for this conversation session */ jobId: string; }; /** * Event handler signatures for HamsaVoiceAgent * * Defines the type-safe interface for all events emitted by the HamsaVoiceAgent. * Each event specifies its exact handler signature, enabling full type safety * and IntelliSense support when using the event emitter API. * * @example * ```typescript * // Fully type-safe event handlers * agent.on('transcriptionReceived', (text: string) => { * console.log('User said:', text); * }); * * agent.on('connectionQualityChanged', ({ quality, metrics }) => { * if (quality === 'poor') { * showNetworkWarning(metrics); * } * }); * * agent.on('callStarted', ({ jobId }) => { * console.log('Call started with ID:', jobId); * analytics.trackCall(jobId); * }); * ``` */ type HamsaVoiceAgentEvents = { /** Emitted when connection is established (before call fully starts) */ start: () => void; /** Emitted when call is fully started and ready with conversation details */ callStarted: (data: CallStartedData) => void; /** Emitted when call ends (user or agent initiated) */ callEnded: () => void; /** Emitted when call is paused */ callPaused: () => void; /** Emitted when call is resumed */ callResumed: () => void; /** Emitted when connection is closed */ closed: () => void; /** Emitted when attempting to reconnect */ reconnecting: () => void; /** Emitted when reconnection succeeds */ reconnected: () => void; /** Emitted when user speech is transcribed */ transcriptionReceived: (text: string) => void; /** Emitted when agent response is received */ answerReceived: (text: string) => void; /** Emitted when agent starts speaking */ speaking: () => void; /** Emitted when agent is listening */ listening: () => void; /** Emitted when agent state changes (idle, initializing, listening, thinking, speaking) */ agentStateChanged: (state: AgentState) => void; /** Emitted when a DTMF digit is successfully sent */ dtmfSent: (digit: DTMFDigit) => void; /** Emitted when an error occurs */ error: (error: Error | HamsaApiError) => void; /** Emitted when a remote track is subscribed */ trackSubscribed: (data: TrackSubscriptionData) => void; /** Emitted when a remote track is unsubscribed */ trackUnsubscribed: (data: TrackUnsubscriptionData) => void; /** Emitted when a local track is published */ localTrackPublished: (data: { track?: LocalTrack; publication: LocalTrackPublication; }) => void; /** Emitted when analytics data is updated */ analyticsUpdated: (analytics: CallAnalyticsResult) => void; /** Emitted when connection quality changes */ connectionQualityChanged: (data: ConnectionQualityData) => void; /** Emitted when connection state changes */ connectionStateChanged: (state: ConnectionState) => void; /** Emitted when audio playback state changes */ audioPlaybackChanged: (playing: boolean) => void; /** Emitted when microphone is muted */ micMuted: () => void; /** Emitted when microphone is unmuted */ micUnmuted: () => void; /** Emitted when a participant connects */ participantConnected: (participant: RemoteParticipant) => void; /** Emitted when a participant disconnects */ participantDisconnected: (participant: RemoteParticipant) => void; /** Emitted when data is received */ dataReceived: (message: Uint8Array, participant: Participant) => void; /** Emitted for custom events */ customEvent: (eventType: string, eventData: unknown, metadata?: Record<string, unknown>) => void; /** Emitted for informational messages */ info: (info: string) => void; /** Emitted when tools are registered with the agent */ toolsRegistered: (tools: Tool[]) => void; /** Emitted when a client-side tool execution fails */ rpcError: (functionName: string, error: unknown) => void; }; /** * HamsaVoiceAgent - Main SDK class for voice agent integration * * This class provides the primary interface for integrating Hamsa voice agents * into web applications. It handles authentication, connection management, * conversation lifecycle, analytics, and client-side tool execution. * * Note: This class uses declaration merging with an interface (defined below) * to provide type-safe event handlers. This is intentional and safe. * * Key features: * - Real-time voice communication with AI agents * - Comprehensive analytics and quality monitoring * - Client-side tool integration for extended functionality * - Automatic screen wake lock management during calls * - Event-driven architecture for reactive applications * - Built-in error handling and reconnection logic * * @example Basic Usage * ```typescript * import { HamsaVoiceAgent } from '@hamsa-ai/voice-agents-sdk'; * * const agent = new HamsaVoiceAgent('your_api_key'); * * // Listen for events * agent.on('callStarted', () => console.log('Call started')); * agent.on('answerReceived', (text) => console.log('Agent said:', text)); * agent.on('transcriptionReceived', (text) => console.log('User said:', text)); * * // Start conversation * await agent.start({ * agentId: 'your_agent_id', * voiceEnablement: true, * params: { userName: 'John', context: 'support_inquiry' } * }); * ``` * * @example With Client-side Tools * ```typescript * const weatherTool = { * function_name: 'getCurrentWeather', * description: 'Gets current weather for a location', * parameters: [ * { name: 'location', type: 'string', description: 'City name' } * ], * required: ['location'], * fn: async (location) => { * const response = await fetch(`/api/weather?city=${location}`); * return response.json(); * } * }; * * await agent.start({ * agentId: 'weather_agent_id', * tools: [weatherTool], * voiceEnablement: true * }); * ``` * * @example Analytics Monitoring * ```typescript * // Real-time quality monitoring * agent.on('connectionQualityChanged', ({ quality, metrics }) => { * if (quality === 'poor') { * showNetworkWarning(); * } * }); * * // Periodic analytics updates * agent.on('analyticsUpdated', (analytics) => { * updateDashboard({ * duration: analytics.performanceMetrics.callDuration, * quality: analytics.connectionStats.quality, * latency: analytics.connectionStats.latency * }); * }); * * // Get analytics snapshot anytime * const analytics = agent.getCallAnalytics(); * ``` * * @example Track-based Audio Processing * ```typescript * // Handle incoming audio tracks from voice agent * agent.on('trackSubscribed', ({ track, publication, participant }) => { * if (track.kind === 'audio') { * // Option 1: Attach to DOM element (LiveKit way) * track.attach(audioElement); * * // Option 2: Create MediaStream for custom processing * const stream = new MediaStream([track.mediaStreamTrack]); * const audioContext = new AudioContext(); * const source = audioContext.createMediaStreamSource(stream); * // Add custom audio processing... * } * }); * * // Handle local audio track availability * agent.on('localTrackPublished', ({ track, publication }) => { * if (track && track.source === 'microphone') { * // Access local microphone track for recording/analysis * const stream = new MediaStream([track.mediaStreamTrack]); * setupVoiceAnalyzer(stream); * } * }); * ``` */ declare class HamsaVoiceAgent extends EventEmitter { #private; /** Default fallback output volume when not connected */ private static readonly DEFAULT_OUTPUT_VOLUME; /** Default fallback input volume when not connected */ private static readonly DEFAULT_INPUT_VOLUME; /** Internal LiveKit manager instance for WebRTC communication */ liveKitManager: LiveKitManager | null; /** Hamsa API key for authentication */ apiKey: string; /** Base URL for Hamsa API endpoints */ API_URL: string; /** LiveKit RTC WebSocket URL */ LIVEKIT_URL: string; /** Enable debug logging for troubleshooting */ debug: boolean; /** Job ID for tracking conversation completion status */ jobId: string | null; /** Screen wake lock manager to prevent device sleep during calls */ wakeLockManager: ScreenWakeLock; /** Flag to track if the user initiated the call end to prevent duplicate disconnection logic */ private userInitiatedEnd; /** Debug logger instance for conditional logging */ private readonly logger; /** * Creates a new HamsaVoiceAgent instance * * @param apiKey - Your Hamsa API key (get from https://dashboard.tryhamsa.com) * @param config - Optional configuration settings * @param config.region - Deployment region ('eu' | 'uae'). Defaults to 'eu'. * @param config.API_URL - Custom API endpoint URL. Overrides the region default. * @param config.LIVEKIT_URL - Custom LiveKit RTC URL. Overrides the region default. * * @example * ```typescript * // Using default region (EU) * const agent = new HamsaVoiceAgent('hamsa_api_key_here'); * * // Using UAE region * const agent = new HamsaVoiceAgent('hamsa_api_key_here', { region: 'uae' }); * * // Using custom endpoints (overrides region) * const agent = new HamsaVoiceAgent('hamsa_api_key_here', { * API_URL: 'https://custom-api.example.com', * LIVEKIT_URL: 'wss://custom-rtc.example.com' * }); * ``` * * @throws {Error} If apiKey is not provided or invalid */ constructor(apiKey: string, { region, API_URL, LIVEKIT_URL, debug, }?: HamsaVoiceAgentConfig); /** * Adjusts the volume level for voice agent audio playback * * Controls the volume of the voice agent's speech output. This affects * all audio playback from the agent but does not change the user's * microphone input level. * * @param volume - Volume level between 0.0 (muted) and 1.0 (full volume) * * @example * ```typescript * // Set to half volume * agent.setVolume(0.5); * * // Mute agent completely * agent.setVolume(0); * * // Full volume * agent.setVolume(1.0); * * // Can be called during active conversation * agent.on('callStarted', () => { * agent.setVolume(0.8); // Slightly quieter * }); * ``` */ setVolume(volume: number): void; /** * Gets the current output volume level * * Returns the current volume setting for voice agent audio playback. * This represents the playback volume for all voice agent audio streams. * * @returns Current output volume level (0.0 = muted, 1.0 = full volume) * * @example * ```typescript * const currentVolume = agent.getOutputVolume(); * console.log(`Volume: ${Math.round(currentVolume * 100)}%`); * ``` */ getOutputVolume(): number; /** * Gets the current job/call ID for this conversation * * The job ID uniquely identifies this conversation session and can be used * to track the conversation, retrieve analytics, or poll for completion status * using the getJobDetails() method. * * @returns The job/call ID, or null if conversation hasn't started yet * * @example * ```typescript * // Get job ID when call starts * agent.on('callStarted', () => { * const callId = agent.getJobId(); * console.log('Call ID:', callId); * * // Send to analytics service * analytics.trackCall(callId); * * // Store for later reference * localStorage.setItem('lastCallId', callId); * }); * * // Use job ID to check completion status * agent.on('callEnded', async () => { * const jobId = agent.getJobId(); * if (jobId) { * const details = await agent.getJobDetails(); * console.log('Call completed:', details); * } * }); * ``` */ getJobId(): string | null; /** * Gets the current input volume level from the user's microphone * * Returns the current microphone input level for voice activity detection. * Can be used to create visual feedback for user speaking indicators. * * @returns Current input volume level (0.0 = no input, 1.0 = maximum input) * * @example * ```typescript * // Create voice activity indicator * setInterval(() => { * const inputLevel = agent.getInputVolume(); * updateMicrophoneIndicator(inputLevel); * }, 100); * ``` */ getInputVolume(): number; /** * Mutes or unmutes the user's microphone * * Controls the user's microphone input to the voice agent conversation. * When muted, the user's voice will not be transmitted to the agent. * * @param muted - True to mute microphone, false to unmute * * @example * ```typescript * // Mute microphone * agent.setMicMuted(true); * * // Toggle microphone * const isMuted = agent.isMicMuted(); * agent.setMicMuted(!isMuted); * ``` */ setMicMuted(muted: boolean): void; /** * Checks if the user's microphone is currently muted * * @returns True if microphone is muted, false if unmuted * * @example * ```typescript * if (agent.isMicMuted()) { * showUnmutePrompt(); * } * ``` */ isMicMuted(): boolean; /** * @internal * Notifies the agent about user activity * * Prevents the agent from interrupting when the user is actively interacting * with the interface. The agent will not attempt to speak for at least 2 seconds * after user activity is detected. * * @example * ```typescript * // Prevent interruptions while user is typing * textInput.addEventListener('input', () => { * agent.sendUserActivity(); * }); * * // Prevent interruptions during UI interactions * document.addEventListener('click', () => { * agent.sendUserActivity(); * }); * ``` */ sendUserActivity(): void; /** * @internal * Sends a contextual update to the agent * * Informs the agent about user actions or state changes that are not direct * conversation messages but may influence the agent's responses. Unlike regular * messages, contextual updates don't trigger the agent to take its turn in * the conversation. * * @param context - Contextual information to send to the agent * * @example * ```typescript * // Inform agent about navigation * agent.sendContextualUpdate("User navigated to checkout page"); * * // Inform about app state changes * agent.sendContextualUpdate("User's cart total: $127.50"); * * // Inform about user preferences * agent.sendContextualUpdate("User selected dark mode theme"); * ``` */ sendContextualUpdate(context: string): void; /** * Sends a DTMF (Dual-Tone Multi-Frequency) digit to the voice agent * * Simulates pressing a key on a phone keypad during the call. This enables * browser-based call testing with DTMF input simulation, allowing users to * test IVR flows and DTMF transitions without making actual phone calls. * * Uses LiveKit's native DTMF API (publishDtmf) which properly integrates with * telephony systems and SIP infrastructure per RFC 4733 standard. * * @param digit - A single DTMF digit: '0'-'9', '*', or '#' * @throws {Error} If called when not connected (no active call) * @throws {Error} If the digit is not a valid DTMF character * @fires dtmfSent When a DTMF digit is successfully sent to the agent * * @example Basic usage * ```typescript * const agent = new HamsaVoiceAgent(apiKey, config); * await agent.start({ agentId, params }); * * // Listen for DTMF send confirmations * agent.on('dtmfSent', (digit) => { * console.log(`Sent DTMF digit: ${digit}`); * highlightKeypadButton(digit); * }); * * // Later, when user presses a key on the UI keypad: * agent.sendDTMF('1'); // Simulates pressing "1" * agent.sendDTMF('*'); // Simulates pressing "*" * agent.sendDTMF('#'); // Simulates pressing "#" * ``` * * @example With UI keypad * ```typescript * // Create keypad buttons * const digits = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '0', '#']; * * digits.forEach(digit => { * const button = document.createElement('button'); * button.textContent = digit; * button.onclick = () => { * try { * agent.sendDTMF(digit); * playKeyTone(digit); // Optional: play local tone feedback * } catch (error) { * console.error('Failed to send DTMF:', error.message); * } * }; * keypadContainer.appendChild(button); * }); * ``` * * @example Error handling * ```typescript * try { * agent.sendDTMF('1'); * } catch (error) { * if (error.message.includes('not connected')) { * showConnectionError(); * } else if (error.message.includes('Invalid DTMF')) { * showInvalidInputError(); * } * } * ``` */ sendDTMF(digit: DTMFDigit): void; /** * Gets frequency data from the user's microphone input * * Returns frequency domain data for audio visualization and analysis. * Can be used to create voice activity indicators, audio visualizers, * or advanced voice processing features. * * @returns Uint8Array containing frequency data (0-255 per frequency bin) * * @example * ```typescript * // Create simple audio visualizer * function updateVisualizer() { * const frequencyData = agent.getInputByteFrequencyData(); * const average = frequencyData.reduce((a, b) => a + b) / frequencyData.length; * const percentage = Math.round((average / 255) * 100); * document.getElementById('micLevel').style.width = `${percentage}%`; * } * setInterval(updateVisualizer, 50); * ``` */ getInputByteFrequencyData(): Uint8Array; /** * Gets frequency data from the agent's audio output * * Returns frequency domain data from the agent's voice for analysis * and visualization. Useful for creating voice characteristic displays * or audio processing features. * * @returns Uint8Array containing frequency data (0-255 per frequency bin) * * @example * ```typescript * // Analyze agent voice characteristics * agent.on('speaking', () => { * const interval = setInterval(() => { * const frequencyData = agent.getOutputByteFrequencyData(); * const dominantFreq = findDominantFrequency(frequencyData); * updateVoiceAnalysis(dominantFreq); * }, 100); * * agent.once('listening', () => clearInterval(interval)); * }); * ``` */ getOutputByteFrequencyData(): Uint8Array; /** * Enables real-time audio capture from the conversation * * This method allows clients to receive raw audio data from the agent, user, or both, * enabling use cases like forwarding audio to third-party services, custom recording, * real-time transcription, or audio analysis. * * The audio can be captured in three formats: * - `opus-webm`: Efficient Opus codec in WebM container (default, recommended) * - `pcm-f32`: Raw PCM audio as Float32Array for advanced processing * - `pcm-i16`: Raw PCM audio as Int16Array for compatibility * * @param options - Configuration options for audio capture * @param options.source - Which audio to capture: 'agent' (default), 'user', or 'both' * @param options.format - Audio format to receive (default: 'opus-webm') * @param options.chunkSize - Chunk size in milliseconds for encoded formats (default: 100ms) * @param options.bufferSize - Buffer size in samples for PCM formats (default: 4096) * @param options.callback - Function called with each audio chunk * * @example Forward agent audio to third-party service * ```typescript * // Start capturing agent audio when call begins * agent.on('callStarted', () => { * agent.enableAudioCapture({ * source: 'agent', * format: 'opus-webm', * chunkSize: 100, // 100ms chunks * callback: (audioData, metadata) => { * console.log(`Audio from ${metadata.participant}: ${audioData.byteLength} bytes`); * * // Send to third-party service via WebSocket * thirdPartyWebSocket.send(audioData); * * // Or via HTTP * fetch('https://api.example.com/audio', { * method: 'POST', * body: audioData, * headers: { * 'Content-Type': 'audio/webm', * 'X-Participant': metadata.participant, * 'X-Timestamp': metadata.timestamp.toString() * } * }); * } * }); * }); * ``` * * @example Capture both agent and user for custom processing * ```typescript * agent.enableAudioCapture({ * source: 'both', * format: 'pcm-f32', * bufferSize: 4096, * callback: (audioData, metadata) => { * const samples = audioData as Float32Array; * * if (metadata.source === 'agent') { * // Process agent audio * analyzeAgentVoice(samples, metadata.sampleRate); * } else { * // Process user audio * analyzeUserVoice(samples, metadata.sampleRate); * } * * // Save to custom recorder * customRecorder.addAudioChunk({ * source: metadata.source, * samples, * sampleRate: metadata.sampleRate, * timestamp: metadata.timestamp * }); * } * }); * ``` * * @example Real-time transcription integration * ```typescript * import { AudioCaptureOptions } from '@hamsa-ai/voice-agents-sdk'; * * const transcriptionService = new WebSocket('wss://transcription.example.com'); * * agent.enableAudioCapture({ * source: 'user', * format: 'opus-webm', * chunkSize: 50, // Lower latency for real-time transcription * callback: (audioData, metadata) => { * // Forward user audio to transcription service * transcriptionService.send(JSON.stringify({ * audio: Array.from(new Uint8Array(audioData as ArrayBuffer)), * timestamp: metadata.timestamp, * sampleRate: metadata.sampleRate * })); * } * }); * * transcriptionService.onmessage = (event) => { * const transcription = JSON.parse(event.data); * console.log('Real-time transcription:', transcription.text); * displayUserSpeech(transcription.text); * }; * ``` */ enableAudioCapture(options: AudioCaptureOptions): void; /** * Disables audio capture and releases all capture resources * * Stops all active audio capture, cleans up MediaRecorders and audio processors, * and releases associated resources. Safe to call even if audio capture is not enabled. * * @example * ```typescript * // Stop capturing when call ends * agent.on('callEnded', () => { * agent.disableAudioCapture(); * console.log('Audio capture stopped'); * }); * * // Or stop manually * stopCaptureButton.addEventListener('click', () => { * agent.disableAudioCapture(); * }); * ``` */ disableAudioCapture(): void; /** * Initiates a new voice agent conversation * * This is the primary method for starting interactions with a voice agent. * It handles authentication, connection establishment, tool registration, * and event forwarding. The method is asynchronous and will emit events * to indicate connection status and conversation progress. * * @param options - Configuration options for the conversation * @param options.agentId - Unique identifier of the voice agent (from Hamsa dashboard) * @param options.params - Parameters to customize the conversation context * @param options.voiceEnablement - Enable voice interactions (default: false for text-only) * @param options.tools - Client-side tools available to the agent * * @throws {Error} Authentication failures, network errors, or invalid configuration * * @example Basic voice conversation * ```typescript * try { * await agent.start({ * agentId: 'agent_12345', * voiceEnablement: true, * params: { * userName: 'Alice', * userTier: 'premium', * sessionContext: 'product_support' * } * }); * console.log('Voice agent conversation started'); * } catch (error) { * console.error('Failed to start conversation:', error); * } * ``` * * @example With custom tools * ```typescript * const customerDataTool = { * function_name: 'getCustomerData', * description: 'Retrieves customer account information', * parameters: [ * { name: 'customerId', type: 'string', description: 'Customer ID' } * ], * required: ['customerId'], * fn: async (customerId) => { * return await customerAPI.getProfile(customerId); * } * }; * * await agent.start({ * agentId: 'support_agent', * voiceEnablement: true, * tools: [customerDataTool], * params: { department: 'billing' } * }); * ``` * * @example Event handling * ```typescript * // Set up event listeners before starting * agent.on('callStarted', () => { * console.log('Conversation began'); * startRecordingMetrics(); * }); * * agent.on('error', (error) => { * console.error('Conversation error:', error); * handleConversationError(error); * }); * * await agent.start({ agentId: 'my_agent', voiceEnablement: true }); * ``` */ start({ agentId, params, voiceEnablement, tools, userId: _userId, preferHeadphonesForIosDevices: _preferHeadphonesForIosDevices, connectionDelay: _connectionDelay, disableWakeLock: _disableWakeLock, onAudioData, captureAudio, avatarContainerSelector, }: StartOptions): Promise<void>; /** * Terminates the current voice agent conversation * * Safely ends the conversation, disconnects from the WebRTC session, * releases system resources (including screen wake lock), and performs * cleanup. This method should be called when the conversation is complete. * * @example * ```typescript * // End conversation when user clicks hang up * hangupButton.addEventListener('click', () => { * agent.end(); * }); * * // End conversation after timeout * setTimeout(() => { * agent.end(); * console.log('Conversation ended due to timeout'); * }, 300000); // 5 minutes * * // Listen for end event * agent.on('callEnded', () => { * console.log('Conversation terminated'); * updateUI('disconnected'); * saveConversationSummary(); * }); * ``` */ end(): void; /** * Temporarily pauses the voice agent conversation * * Pauses audio transmission and reception while maintaining the underlying * connection. The conversation can be resumed later using resume(). This * is useful for temporary interruptions without ending the entire session. * * @example * ```typescript * // Pause when user needs to take another call * pauseButton.addEventListener('click', () => { * agent.pause(); * console.log('Conversation paused'); * }); * * // Auto-pause after period of silence * let silenceTimeout; * agent.on('listening', () => { * silenceTimeout = setTimeout(() => { * agent.pause(); * showResumePrompt(); * }, 60000); // 1 minute of silence * }); * * agent.on('speaking', () => { * clearTimeout(silenceTimeout); * }); * * // Listen for pause event * agent.on('callPaused', () => { * showPausedIndicator(); * disableMicrophone(); * }); * ``` */ pause(): void; /** * Resumes a paused voice agent conversation * * Restores audio transmission and reception, continuing the conversation * from where it was paused. Re-acquires screen wake lock to prevent * device sleep during active conversation. * * @example * ```typescript * // Resume when user is ready to continue * resumeButton.addEventListener('click', () => { * agent.resume(); * console.log('Conversation resumed'); * }); * * // Resume automatically after user interaction * document.addEventListener('click', () => { * if (agent.isPaused) { * agent.resume(); * } * }); * * // Listen for resume event * agent.on('callResumed', () => { * hidePausedIndicator(); * enableMicrophone(); * showActiveIndicator(); * }); * ``` */ resume(): void; /** * Retrieves job details from the Hamsa API using the stored jobId. * Implements retry logic with exponential backoff. * @param maxRetries - Maximum number of retry attempts. * @param initialRetryInterval - Initial delay between retries in milliseconds. * @param backoffFactor - Factor by which the retry interval increases each attempt. * @returns Job details object. */ getJobDetails(maxRetries?: number, initialRetryInterval?: number, backoffFactor?: number): Promise<JobDetails>; /** * Retrieves current network connection statistics and quality metrics * * @returns Connection statistics object or null if not connected * * @example * ```typescript * const stats = agent.getConnectionStats(); * if (stats) { * console.log(`Latency: ${stats.latency}ms`); * console.log(`Quality: ${stats.quality}`); * console.log(`Packet Loss: ${stats.packetLoss}%`); * * // Show network warning for poor quality * if (stats.quality === 'poor') { * showNetworkWarning(stats); * } * } * ``` */ getConnectionStats(): ConnectionStatsResult | null; /** * Retrieves current audio levels and quality metrics for both user and agent * * @returns Audio metrics object or null if not connected * * @example * ```typescript * const audio = agent.getAudioLevels(); * if (audio) { * // Update UI audio level meters * updateAudioMeter('user', audio.userAudioLevel); * updateAudioMeter('agent', audio.agentAudioLevel); * * // Display speaking time statistics * const userMinutes = Math.floor(audio.userSpeakingTime / 60000); * const agentMinutes = Math.floor(audio.agentSpeakingTime / 60000); * console.log(`User spoke for ${userMinutes} minutes`); * console.log(`Agent spoke for ${agentMinutes} minutes`); * } * ``` */ getAudioLevels(): AudioLevelsResult | null; /** * Retrieves current performance metrics including response times and call duration * * @returns Performance metrics object or null if not connected * * @example * ```typescript * const perf = agent.getPerformanceMetrics(); * if (perf) { * // Monitor response time for quality assurance * if (perf.responseTime > 3000) { * console.warn('High response time:', perf.responseTime + 'ms'); * } * * // Display call duration * const minutes = Math.floor(perf.callDuration / 60000); * const seconds = Math.floor((perf.callDuration % 60000) / 1000); * updateTimer(`${minutes}:${seconds.toString().padStart(2, '0')}`); * } * ``` */ getPerformanceMetrics(): PerformanceMetricsResult | null; /** * Retrieves information about all participants in the conversation * * @returns Array of participant data objects (empty array if not connected) * * @example * ```typescript * const participants = agent.getParticipants(); * * participants.forEach(participant => { * console.log(`Participant: ${participant.identity}`); * console.log(`Connected: ${new Date(participant.connectionTime)}`); * * // Display participant info in UI * if (participant.identity.includes('agent')) { * showAgentStatus('connected', participant.metadata); * } * }); * * // Check if agent is present * const hasAgent = participants.some(p => p.identity.includes('agent')); * ``` */ getParticipants(): ParticipantData[]; /** * Retrieves current audio track statistics and stream information * * @returns Track statistics object or null if not connected * * @example * ```typescript * const trackStats = agent.getTrackStats(); * if (trackStats) { * console.log(`Active tracks: ${trackStats.activeTracks}/${trackStats.totalTracks}`); * console.log(`Audio elements: ${trackStats.audioElements}`); * * // Check track health * if (trackStats.activeTracks === 0) { * console.warn('No active audio tracks'); * showAudioWarning(); * } * } * ``` */ getTrackStats(): TrackStatsResult | null; /** * Retrieves comprehensive analytics combining all metrics into a single snapshot * * This is the primary method for accessing complete conversation analytics, * combining connection statistics, audio metrics, performance data, participant * information, and track statistics into a unified result. * * @returns Complete analytics object or null if not connected * * @example * ```typescript * const analytics = agent.getCallAnalytics(); * if (analytics) { * // Log comprehensive conversation summary * console.log('=== Conversation Analytics ==='); * console.log(`Duration: ${analytics.performanceMetrics.callDuration}ms`); * console.log(`Quality: ${analytics.connectionStats.quality}`); * console.log(`Latency: ${analytics.connectionStats.latency}ms`); * console.log(`Participants: ${analytics.participants.length}`); * * // Send to analytics service * analyticsService.recordConversation({ * sessionId: generateSessionId(), * agentId: currentAgentId, * timestamp: Date.now(), * metrics: analytics * }); * * // Check for quality issues * if (analytics.connectionStats.packetLoss > 5) { * reportNetworkIssue(analytics); * } * } * ``` */ getCallAnalytics(): CallAnalyticsResult | null; /** * Gets the LiveKit Room instance for React SDK integration * * Provides access to the underlying LiveKit Room object for use with * LiveKit React components. This enables integration with the broader * LiveKit React ecosystem while maintaining the benefits of the * HamsaVoiceAgent abstraction. * * @internal - For use by @hamsa-ai/voice-agents-react only * @returns LiveKit Room instance or null if not connected * * @example React SDK Integration * ```typescript * import { RoomContext } from '@livekit/components-react'; * * function VoiceProvider({ agent, children }) { * const [room, setRoom] = useState(null); * * useEffect(() => { * agent.on('callStarted', () => { * setRoom(agent.getRoom()); * }); * }, [agent]); * * if (!room) return children; * * return ( * <RoomContext.Provider value={room}> * {children} * </RoomContext.Provider> * ); * } * ``` */ getRoom(): Room | null; /** * Gets the remote audio track from the voice agent for visualization * * Provides access to the agent's audio track for use with LiveKit React * visualization components like BarVisualizer. Returns undefined if not * connected or no remote audio track is available. * * @returns RemoteTrack | undefined - The agent's audio track or undefined if not available * * @example With LiveKit BarVisualizer * ```typescript * import { BarVisualizer } from '@livekit/components-react'; * * function AgentVisualizer({ agent }) { * const [audioTrack, setAudioTrack] = useState(); * * useEffect(() => { * agent.on('trackSubscribed', ({ track }) => { * if (track.kind === 'audio') { * setAudioTrack(track); * } * }); * }, [agent]); * * if (!audioTrack) return null; * * return <BarVisualizer trackRef={{ track: audioTrack, source: 'microphone' }} />; * } * ``` */ getRemoteAudioTrack(): RemoteTrack | undefined; } /** * Declaration merging: Add type-safe event methods to HamsaVoiceAgent * * This interface merges with the HamsaVoiceAgent class to provide fully * typed event handler methods without requiring explicit type assertions. * * @example * ```typescript * const agent = new HamsaVoiceAgent('api_key'); * * // ✅ Fully type-safe - no casting needed! * agent.on('transcriptionReceived', (text) => { * console.log(text); // text is inferred as string * }); * * // ❌ Type error - wrong event name * agent.on('wrongEvent', () => {}); * * // ❌ Type error - wrong handler signature * agent.on('transcriptionReceived', () => {}); // Missing parameter * ``` */ interface HamsaVoiceAgent { /** * Registers an event listener with type-safe event names and handlers */ on<K extends keyof HamsaVoiceAgentEvents>(event: K, listener: HamsaVoiceAgentEvents[K]): this; /** * Removes an event listener with type-safe event names and handlers */ off<K extends keyof HamsaVoiceAgentEvents>(event: K, listener: HamsaVoiceAgentEvents[K]): this; /** * Registers a one-time event listener with type-safe event names and handlers */ once<K extends keyof HamsaVoiceAgentEvents>(event: K, listener: HamsaVoiceAgentEvents[K]): this; /** * Emits an event with type-safe event names and arguments */ emit<K extends keyof HamsaVoiceAgentEvents>(event: K, ...args: Parameters<HamsaVoiceAgentEvents[K]>): boolean; } export { HamsaVoiceAgent, HamsaApiError }; export default HamsaVoiceAgent; export type { LocalTrack, RemoteParticipant, RemoteTrack, RemoteTrackPublication, Room, } from 'livekit-client'; export type { AudioLevelsResult, CallAnalyticsResult, ConnectionStatsResult, ParticipantData, PerformanceMetricsResult, TrackStatsResult, } from './classes/livekit-manager'; export type { CallStartedData, HamsaVoiceAgentEvents, StartOptions, Tool, JobDetails, };