@hamsa-ai/voice-agents-sdk
Version:
Hamsa AI - Voice Agents JavaScript SDK
416 lines (415 loc) • 15.1 kB
TypeScript
/**
* Shared types and interfaces for LiveKit modules
*/
import type { ConnectionQuality, Track, TrackPublication } from 'livekit-client';
/**
* Agent state as defined by LiveKit
* Represents the current state of the voice agent
*/
export type AgentState = 'idle' | 'initializing' | 'listening' | 'thinking' | 'speaking';
/**
* Function signature for client-side tools that can be executed by the agent.
* Tools can be synchronous or asynchronous and accept variable arguments.
*/
export type ToolFunction = (...args: unknown[]) => unknown | Promise<unknown>;
/**
* Definition for a client-side tool that can be registered with the voice agent.
* These tools are made available to the agent as RPC methods during conversations.
*/
export type Tool = {
/** The name of the function that the agent can call */
function_name: string;
/** The implementation function to execute when the agent calls this tool */
fn?: ToolFunction;
};
/**
* Represents an audio level measurement at a specific point in time.
* Used for tracking audio activity and volume levels during calls.
*/
export type AudioLevel = {
/** Unix timestamp when the audio level was measured */
timestamp: number;
/** Audio level value (typically 0.0 to 1.0) */
level: number;
/** Optional identifier of the participant this audio level belongs to */
participant?: string;
};
/**
* Information about a participant in the voice conversation.
* Contains connection details and metadata for analytics and monitoring.
*/
export type ParticipantData = {
/** Unique identity of the participant (e.g., 'agent', 'user') */
identity: string;
/** Session ID assigned by LiveKit for this participant */
sid: string;
/** Unix timestamp when the participant connected */
connectionTime: number;
/** Optional metadata associated with the participant */
metadata?: string;
};
/**
* Statistics and metadata for a media track (audio/video stream).
* Tracks the lifecycle and details of each media stream in the conversation.
*/
export type TrackStatsData = {
/** Unique identifier for this track */
trackId: string;
/** Type of track (e.g., 'audio', 'video') */
kind: string;
/** Identity of the participant who owns this track */
participant: string;
/** Unix timestamp when this track was subscribed to */
subscriptionTime: number;
/** LiveKit track publication object containing track details */
publication: TrackPublication;
/** Track source (microphone, screen_share, etc.) */
source?: string;
/** Whether the track is currently muted */
muted?: boolean;
/** Whether the track is enabled */
enabled?: boolean;
/** Track dimensions for video tracks */
dimensions?: {
width: number;
height: number;
};
/** Whether the track uses simulcast */
simulcasted?: boolean;
};
/**
* Comprehensive call statistics for monitoring connection health and usage.
* Tracks network metrics, participant counts, and overall call quality.
*/
export type CallStats = {
/** Number of connection attempts made during this session */
connectionAttempts: number;
/** Number of reconnection attempts due to network issues */
reconnectionAttempts: number;
/** Total bytes received during the call */
totalBytesReceived: number;
/** Total bytes sent during the call */
totalBytesSent: number;
/** Number of network packets lost */
packetsLost: number;
/** Current number of participants in the conversation */
participantCount: number;
/** Current number of active media tracks */
trackCount: number;
/** Historical audio level measurements */
audioLevels: AudioLevel[];
/** Current overall connection quality assessment */
connectionQuality: string;
};
/**
* Real-time network connection quality metrics.
* Internal structure includes estimated values for future use.
*/
export type ConnectionMetrics = {
/** Current network latency in milliseconds (internal - estimated) */
latency: number;
/** Packet loss percentage (internal - estimated) */
packetLoss: number;
/** Current bandwidth usage in bytes per second (internal - estimated) */
bandwidth: number;
/** Qualitative assessment of connection ('excellent', 'good', 'poor', 'lost') */
quality: string;
/** Network jitter measurement in milliseconds (internal - estimated) */
jitter: number;
};
/**
* Audio quality and usage metrics for both user and agent.
* Tracks speaking patterns, audio levels, and quality indicators.
*/
export type AudioMetrics = {
/** Current audio input level from the user (0.0 to 1.0) */
userAudioLevel: number;
/** Current audio output level from the agent (0.0 to 1.0) */
agentAudioLevel: number;
/** Total time in milliseconds the user has been speaking */
userSpeakingTime: number;
/** Total time in milliseconds the agent has been speaking */
agentSpeakingTime: number;
/** Number of audio interruptions or dropouts detected */
audioDropouts: number;
/** Whether echo cancellation is currently active */
echoCancellationActive: boolean;
};
/**
* Performance metrics tracking response times and connection reliability.
* Internal structure includes estimated network latency for future use.
*/
export type PerformanceMetrics = {
/** Total response time for agent interactions in milliseconds */
responseTime: number;
/** Current network latency measurement in milliseconds (internal - estimated) */
networkLatency: number;
/** Time taken to establish the initial connection in milliseconds */
connectionEstablishedTime: number;
/** Total number of reconnections that have occurred */
reconnectionCount: number;
};
/**
* Complete connection statistics result returned by getConnectionStats().
* Only includes verified data - no estimated metrics exposed to customers.
*/
export type ConnectionStatsResult = {
/** Current connection quality assessment from LiveKit */
quality: string;
/** Total connection attempts made */
connectionAttempts: number;
/** Total reconnection attempts made */
reconnectionAttempts: number;
/** Time taken to establish connection in milliseconds */
connectionEstablishedTime: number;
/** Whether currently connected to the voice agent */
isConnected: boolean;
};
/**
* Complete audio metrics result returned by getAudioLevels().
* Provides comprehensive audio quality and usage information.
*/
export type AudioLevelsResult = {
/** User's audio input level (0.0 to 1.0) */
userAudioLevel: number;
/** Agent's audio output level (0.0 to 1.0) */
agentAudioLevel: number;
/** Total user speaking time in milliseconds */
userSpeakingTime: number;
/** Total agent speaking time in milliseconds */
agentSpeakingTime: number;
/** Number of audio dropouts detected */
audioDropouts: number;
/** Whether echo cancellation is active */
echoCancellationActive: boolean;
/** Current real-time user audio level */
currentUserLevel: number;
/** Current real-time agent audio level */
currentAgentLevel: number;
/** Whether audio is currently paused */
isPaused?: boolean;
/** Current volume level */
volume?: number;
};
/**
* Complete performance metrics result returned by getPerformanceMetrics().
* Only includes verified performance data - no estimated metrics exposed.
*/
export type PerformanceMetricsResult = {
/** Total response time in milliseconds */
responseTime: number;
/** Connection establishment time in milliseconds */
connectionEstablishedTime: number;
/** Total reconnection count */
reconnectionCount: number;
/** Total call duration in milliseconds */
callDuration: number;
/** Average response time in milliseconds */
averageResponseTime: number;
};
/**
* Complete track statistics result returned by getTrackStats().
* Provides comprehensive track information and analytics.
*/
export type TrackStatsResult = {
/** Total number of tracks ever created */
totalTracks: number;
/** Current number of active tracks */
activeTracks: number;
/** Number of audio elements currently active */
audioElements: number;
/** Detailed track statistics as [trackId, trackData] pairs */
trackDetails: [string, TrackStatsData][];
};
/**
* Complete analytics result returned by getCallAnalytics().
* Combines all analytics data into a comprehensive report.
*/
export type CallAnalyticsResult = {
/** Connection statistics and quality metrics */
connectionStats: ConnectionStatsResult;
/** Audio quality and usage metrics */
audioMetrics: AudioLevelsResult;
/** Performance metrics and timings */
performanceMetrics: PerformanceMetricsResult;
/** Current participant information */
participants: ParticipantData[];
/** Track statistics and details */
trackStats: TrackStatsResult;
/** Raw call statistics */
callStats: CallStats;
/** Additional metadata */
metadata: {
/** Call start time */
callStartTime: number | null;
/** Whether currently connected */
isConnected: boolean;
/** Whether call is paused */
isPaused: boolean;
/** Current volume level */
volume: number;
};
};
/**
* Data structure for connection quality change events.
* Provides detailed information about network conditions and performance.
*/
export type ConnectionQualityData = {
/** Current connection quality level */
quality: ConnectionQuality;
/** Identity of the participant this quality measurement applies to */
participant: string;
/** Connection quality string from LiveKit (estimated metrics available internally) */
metrics: {
quality: string;
};
};
/**
* Data structure for track subscription events.
* Contains information about newly available audio/video streams.
*/
export type TrackSubscriptionData = {
/** The LiveKit track object that was subscribed to */
track: Track;
/** The track publication containing metadata */
publication: TrackPublication;
/** Identity of the participant who owns this track */
participant: string;
/** Optional statistics about this track subscription */
trackStats?: TrackStatsData;
};
/**
* Data structure for track unsubscription events.
* Contains information about audio/video streams that are no longer available.
*/
export type TrackUnsubscriptionData = {
/** The LiveKit track object that was unsubscribed from */
track: Track;
/** The track publication that was removed */
publication: TrackPublication;
/** Identity of the participant who owned this track */
participant: string;
};
/** Minimal WebAudio types (kept lightweight for cross-env compatibility) */
export type MinimalAnalyser = {
fftSize: number;
frequencyBinCount: number;
getByteFrequencyData(dataArray: Uint8Array): void;
};
export type MinimalAudioNode = {
connect?(destination: unknown): void;
};
export type MinimalAudioContext = {
createAnalyser(): MinimalAnalyser;
createMediaElementSource(element: HTMLAudioElement): MinimalAudioNode;
createMediaStreamSource(stream: MediaStream): MinimalAudioNode;
};
/**
* Metadata provided with custom events from voice agents.
* Contains contextual information about when and from whom the event originated.
*/
export type CustomEventMetadata = {
/** Unix timestamp when the event was generated */
timestamp: number;
/** Identity of the participant who triggered the event */
participant: string;
/** The original raw message data from LiveKit */
rawMessage: Record<string, unknown>;
};
/**
* Audio format types supported for audio capture
*/
export type AudioCaptureFormat = 'opus-webm' | 'pcm-f32' | 'pcm-i16';
/**
* Source of audio to capture
*/
export type AudioCaptureSource = 'agent' | 'user' | 'both';
/**
* Metadata provided with each audio data chunk
*/
export type AudioCaptureMetadata = {
/** Identity of the participant this audio is from */
participant: string;
/** Type of participant ('agent' or 'user') */
source: 'agent' | 'user';
/** Unix timestamp when this audio chunk was captured */
timestamp: number;
/** Track ID associated with this audio */
trackId: string;
/** Source of this specific track (e.g. 'microphone', 'screen_share') */
trackSource?: string;
/** Audio format of this chunk */
format: AudioCaptureFormat;
/** Sample rate in Hz (for PCM formats) */
sampleRate?: number;
/** Number of channels (typically 1 for mono) */
channels?: number;
};
/**
* Callback function type for audio data capture
*/
export type AudioCaptureCallback = (audioData: ArrayBuffer | Float32Array | Int16Array, metadata: AudioCaptureMetadata) => void;
/**
* Options for configuring audio capture
*/
export type AudioCaptureOptions = {
/** Source of audio to capture (default: 'agent') */
source?: AudioCaptureSource;
/** Specific track source to capture (default: 'microphone').
* Set to 'all' to capture everything including screen share audio. */
trackSourceFilter?: 'microphone' | 'screen_share' | 'all';
/** Audio format to deliver (default: 'opus-webm') */
format?: AudioCaptureFormat;
/** Chunk size in milliseconds for encoded formats (default: 100ms) */
chunkSize?: number;
/** Buffer size for PCM formats in samples (default: 4096) */
bufferSize?: number;
/** Callback function to receive audio data (Level 3 API - Full control) */
callback?: AudioCaptureCallback;
/** Simpler callback alias (Level 2 API - recommended for inline usage) */
onData?: AudioCaptureCallback;
};
/**
* LiveKit access token payload structure used by Hamsa backend.
* Represents the decoded JWT payload fields relevant for SDK logic.
*/
export type LiveKitTokenPayload = {
video: {
room: string;
roomJoin: boolean;
canPublish: boolean;
canPublishData: boolean;
canSubscribe: boolean;
};
roomConfig: {
name: string;
emptyTimeout: number;
departureTimeout: number;
maxParticipants: number;
minPlayoutDelay: number;
maxPlayoutDelay: number;
syncStreams: boolean;
agents: Array<{
agentName: string;
/** JSON string containing jobId, voiceAgentId, apiKey */
metadata: string;
}>;
};
iss: string;
exp: number;
nbf: number;
sub: string;
};
/**
* Parsed metadata embedded as a JSON string in LiveKit token payload.
*/
export type LiveKitAgentMetadata = {
jobId: string;
voiceAgentId: string;
apiKey: string;
};
/**
* Valid DTMF (Dual-Tone Multi-Frequency) digits that can be sent during a call.
* Includes digits 0-9, asterisk (*), and pound (#) characters.
*/
export type DTMFDigit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '*' | '#';