UNPKG

@anam-ai/js-sdk

Version:

Client side JavaScript SDK for Anam AI

707 lines 33.4 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.StreamingClient = void 0; const ClientMetrics_1 = require("../lib/ClientMetrics"); const modules_1 = require("../modules"); const types_1 = require("../types"); const AgentAudioInputStream_1 = require("../types/AgentAudioInputStream"); const TalkMessageStream_1 = require("../types/TalkMessageStream"); const SUCCESS_METRIC_POLLING_TIMEOUT_MS = 15000; // After this time we will stop polling for the first frame and consider the session a failure. const STATS_COLLECTION_INTERVAL_MS = 5000; const ICE_CANDIDATE_POOL_SIZE = 2; // Optimisation to speed up connection time class StreamingClient { constructor(sessionId, options, publicEventEmitter, internalEventEmitter) { var _a, _b, _c, _d; this.peerConnection = null; this.connectionReceivedAnswer = false; this.remoteIceCandidateBuffer = []; this.inputAudioStream = null; this.dataChannel = null; this.videoElement = null; this.videoStream = null; this.audioStream = null; this.inputAudioState = { isMuted: false, permissionState: types_1.AudioPermissionState.NOT_REQUESTED, }; this.successMetricPoller = null; this.successMetricFired = false; this.showPeerConnectionStatsReport = false; this.peerConnectionStatsReportOutputFormat = 'console'; this.statsCollectionInterval = null; this.agentAudioInputStream = null; this.publicEventEmitter = publicEventEmitter; this.internalEventEmitter = internalEventEmitter; this.apiGatewayConfig = options.apiGateway; // initialize input audio state const { inputAudio } = options; this.inputAudioState = inputAudio.inputAudioState; if (options.inputAudio.userProvidedMediaStream) { this.inputAudioStream = options.inputAudio.userProvidedMediaStream; } this.disableInputAudio = options.inputAudio.disableInputAudio === true; // register event handlers this.internalEventEmitter.addListener(types_1.InternalEvent.WEB_SOCKET_OPEN, this.onSignallingClientConnected.bind(this)); this.internalEventEmitter.addListener(types_1.InternalEvent.SIGNAL_MESSAGE_RECEIVED, this.onSignalMessage.bind(this)); // set ice servers this.iceServers = options.iceServers; // initialize signalling client this.signallingClient = new modules_1.SignallingClient(sessionId, options.signalling, this.publicEventEmitter, this.internalEventEmitter, this.apiGatewayConfig); // initialize engine API client this.engineApiRestClient = new modules_1.EngineApiRestClient(options.engine.baseUrl, sessionId, this.apiGatewayConfig); this.audioDeviceId = options.inputAudio.audioDeviceId; this.showPeerConnectionStatsReport = (_b = (_a = options.metrics) === null || _a === void 0 ? void 0 : _a.showPeerConnectionStatsReport) !== null && _b !== void 0 ? _b : false; this.peerConnectionStatsReportOutputFormat = (_d = (_c = options.metrics) === null || _c === void 0 ? void 0 : _c.peerConnectionStatsReportOutputFormat) !== null && _d !== void 0 ? _d : 'console'; } onInputAudioStateChange(oldState, newState) { // changed microphone mute state if (oldState.isMuted !== newState.isMuted) { if (newState.isMuted) { this.muteAllAudioTracks(); } else { this.unmuteAllAudioTracks(); } } } muteAllAudioTracks() { var _a; (_a = this.inputAudioStream) === null || _a === void 0 ? void 0 : _a.getAudioTracks().forEach((track) => { track.enabled = false; }); } unmuteAllAudioTracks() { var _a; (_a = this.inputAudioStream) === null || _a === void 0 ? void 0 : _a.getAudioTracks().forEach((track) => { track.enabled = true; }); } startStatsCollection() { if (this.statsCollectionInterval) { return; } // Send stats every STATS_COLLECTION_INTERVAL_MS seconds this.statsCollectionInterval = setInterval(() => __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection || !this.dataChannel || this.dataChannel.readyState !== 'open') { return; } try { const stats = yield this.peerConnection.getStats(); this.sendClientSideMetrics(stats); } catch (error) { console.error('Failed to collect and send stats:', error); } }), STATS_COLLECTION_INTERVAL_MS); } sendClientSideMetrics(stats) { stats.forEach((report) => { // Process inbound-rtp stats for both video and audio if (report.type === 'inbound-rtp') { const metrics = { message_type: 'remote_rtp_stats', data: report, }; // Send the metrics via data channel if (this.dataChannel && this.dataChannel.readyState === 'open') { this.dataChannel.send(JSON.stringify(metrics)); } } }); } startSuccessMetricPolling() { if (this.successMetricPoller || this.successMetricFired) { return; } const timeoutId = setTimeout(() => { if (this.successMetricPoller) { console.warn('No video frames received, there is a problem with the connection.'); clearInterval(this.successMetricPoller); this.successMetricPoller = null; } }, SUCCESS_METRIC_POLLING_TIMEOUT_MS); this.successMetricPoller = setInterval(() => __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection || this.successMetricFired) { if (this.successMetricPoller) { clearInterval(this.successMetricPoller); } clearTimeout(timeoutId); return; } try { const stats = yield this.peerConnection.getStats(); let videoDetected = false; let detectionMethod = null; stats.forEach((report) => { // Find the report for inbound video if (report.type === 'inbound-rtp' && report.kind === 'video') { // Method 1: Try framesDecoded (most reliable when available) if (report.framesDecoded !== undefined && report.framesDecoded > 0) { videoDetected = true; detectionMethod = 'framesDecoded'; } else if (report.framesReceived !== undefined && report.framesReceived > 0) { videoDetected = true; detectionMethod = 'framesReceived'; } else if (report.bytesReceived > 0 && report.packetsReceived > 0 && // Additional check: ensure we've received enough data for actual video report.bytesReceived > 100000 // rough threshold ) { videoDetected = true; detectionMethod = 'bytesReceived'; } } }); if (videoDetected && !this.successMetricFired) { this.successMetricFired = true; (0, ClientMetrics_1.sendClientMetric)(ClientMetrics_1.ClientMetricMeasurement.CLIENT_METRIC_MEASUREMENT_SESSION_SUCCESS, '1', detectionMethod ? { detectionMethod } : undefined); if (this.successMetricPoller) { clearInterval(this.successMetricPoller); } clearTimeout(timeoutId); this.successMetricPoller = null; } } catch (error) { } }), 500); } muteInputAudio() { const oldAudioState = this.inputAudioState; const newAudioState = Object.assign(Object.assign({}, this.inputAudioState), { isMuted: true }); this.inputAudioState = newAudioState; this.onInputAudioStateChange(oldAudioState, newAudioState); return this.inputAudioState; } unmuteInputAudio() { const oldAudioState = this.inputAudioState; const newAudioState = Object.assign(Object.assign({}, this.inputAudioState), { isMuted: false }); this.inputAudioState = newAudioState; this.onInputAudioStateChange(oldAudioState, newAudioState); return this.inputAudioState; } getInputAudioState() { return this.inputAudioState; } getPeerConnection() { return this.peerConnection; } changeAudioInputDevice(deviceId) { return __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection) { throw new Error('StreamingClient - changeAudioInputDevice: peer connection is not initialized. Start streaming first.'); } if (deviceId === null || deviceId === undefined) { throw new Error('StreamingClient - changeAudioInputDevice: deviceId is required'); } // Store the current mute state to preserve it const wasMuted = this.inputAudioState.isMuted; try { // Stop the current audio stream tracks if (this.inputAudioStream) { this.inputAudioStream.getAudioTracks().forEach((track) => { track.stop(); }); } // Request new audio stream with the new device ID const audioConstraints = { echoCancellation: true, deviceId: { exact: deviceId, }, }; this.inputAudioStream = yield navigator.mediaDevices.getUserMedia({ audio: audioConstraints, }); // Update the stored device ID this.audioDeviceId = deviceId; // Replace the audio track in the peer connection yield this.setupAudioTrack(); // Restore the mute state if (wasMuted) { this.muteAllAudioTracks(); } // Emit event to notify that the device has changed this.publicEventEmitter.emit(types_1.AnamEvent.INPUT_AUDIO_DEVICE_CHANGED, deviceId); } catch (error) { console.error('Failed to change audio input device:', error); throw new Error(`StreamingClient - changeAudioInputDevice: ${error instanceof Error ? error.message : String(error)}`); } }); } getInputAudioStream() { return this.inputAudioStream; } getVideoStream() { return this.videoStream; } getAudioStream() { return this.audioStream; } sendDataMessage(message) { if (this.dataChannel && this.dataChannel.readyState === 'open') { this.dataChannel.send(message); } } setMediaStreamTargetById(videoElementId) { // set up streaming targets if (videoElementId) { const videoElement = document.getElementById(videoElementId); if (!videoElement) { throw new Error(`StreamingClient: video element with id ${videoElementId} not found`); } this.videoElement = videoElement; } } startConnection() { try { if (this.peerConnection) { console.error('StreamingClient - startConnection: peer connection already exists'); return; } // start the connection this.signallingClient.connect(); } catch (error) { console.error('StreamingClient - startConnection: error', error); this.handleWebrtcFailure(error); } } stopConnection() { return __awaiter(this, void 0, void 0, function* () { yield this.shutdown(); }); } sendTalkCommand(content) { return __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection) { throw new Error('StreamingClient - sendTalkCommand: peer connection is null'); } yield this.engineApiRestClient.sendTalkCommand(content); return; }); } startTalkMessageStream(correlationId) { if (!correlationId) { // generate a random correlation uuid correlationId = Math.random().toString(36).substring(2, 15); } return new TalkMessageStream_1.TalkMessageStream(correlationId, this.internalEventEmitter, this.signallingClient); } createAgentAudioInputStream(config) { this.agentAudioInputStream = new AgentAudioInputStream_1.AgentAudioInputStream(config, this.signallingClient); return this.agentAudioInputStream; } getAgentAudioInputStream() { return this.agentAudioInputStream; } initPeerConnection() { return __awaiter(this, void 0, void 0, function* () { this.peerConnection = new RTCPeerConnection({ iceServers: this.iceServers, iceCandidatePoolSize: ICE_CANDIDATE_POOL_SIZE, }); // set event handlers this.peerConnection.onicecandidate = this.onIceCandidate.bind(this); this.peerConnection.oniceconnectionstatechange = this.onIceConnectionStateChange.bind(this); this.peerConnection.onconnectionstatechange = this.onConnectionStateChange.bind(this); this.peerConnection.addEventListener('track', this.onTrackEventHandler.bind(this)); // set up data channels yield this.setupDataChannels(); // add transceivers this.peerConnection.addTransceiver('video', { direction: 'recvonly' }); if (this.disableInputAudio) { this.peerConnection.addTransceiver('audio', { direction: 'recvonly' }); } else { this.peerConnection.addTransceiver('audio', { direction: 'sendrecv' }); // Handle audio setup after transceivers are configured if (this.inputAudioStream) { // User provided an audio stream, set it up immediately yield this.setupAudioTrack(); } else { // No user stream, start microphone permission request asynchronously // Don't await - let it run in parallel with connection setup this.requestMicrophonePermissionAsync().catch((error) => { console.error('Async microphone permission request failed:', error); }); } } }); } onSignalMessage(signalMessage) { return __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection) { console.error('StreamingClient - onSignalMessage: peerConnection is not initialized'); return; } switch (signalMessage.actionType) { case types_1.SignalMessageAction.ANSWER: const answer = signalMessage.payload; yield this.peerConnection.setRemoteDescription(answer); this.connectionReceivedAnswer = true; // flush the remote buffer this.flushRemoteIceCandidateBuffer(); break; case types_1.SignalMessageAction.ICE_CANDIDATE: const iceCandidateConfig = signalMessage.payload; const candidate = new RTCIceCandidate(iceCandidateConfig); if (this.connectionReceivedAnswer) { yield this.peerConnection.addIceCandidate(candidate); } else { this.remoteIceCandidateBuffer.push(candidate); } break; case types_1.SignalMessageAction.END_SESSION: const reason = signalMessage.payload; this.publicEventEmitter.emit(types_1.AnamEvent.CONNECTION_CLOSED, types_1.ConnectionClosedCode.SERVER_CLOSED_CONNECTION, reason); // close the peer connection this.shutdown(); break; case types_1.SignalMessageAction.WARNING: const message = signalMessage.payload; console.warn('Warning received from server: ' + message); this.publicEventEmitter.emit(types_1.AnamEvent.SERVER_WARNING, message); break; case types_1.SignalMessageAction.TALK_STREAM_INTERRUPTED: const chatMessage = signalMessage.payload; this.publicEventEmitter.emit(types_1.AnamEvent.TALK_STREAM_INTERRUPTED, chatMessage.correlationId); break; case types_1.SignalMessageAction.SESSION_READY: const sessionId = signalMessage.sessionId; this.publicEventEmitter.emit(types_1.AnamEvent.SESSION_READY, sessionId); break; case types_1.SignalMessageAction.HEARTBEAT: break; default: console.warn('StreamingClient - onSignalMessage: unknown signal message action type. Is your anam-sdk version up to date?', signalMessage); } }); } onSignallingClientConnected() { return __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection) { try { yield this.initPeerConnectionAndSendOffer(); } catch (err) { console.error('StreamingClient - onSignallingClientConnected: Error initializing peer connection', err); this.handleWebrtcFailure(err); } } }); } flushRemoteIceCandidateBuffer() { this.remoteIceCandidateBuffer.forEach((candidate) => { var _a; (_a = this.peerConnection) === null || _a === void 0 ? void 0 : _a.addIceCandidate(candidate); }); this.remoteIceCandidateBuffer = []; } /** * ICE Candidate Trickle * As each ICE candidate is gathered from the STUN server it is sent to the * webRTC server immediately in an effort to reduce time to connection. */ onIceCandidate(event) { if (event.candidate) { this.signallingClient.sendIceCandidate(event.candidate); } } onIceConnectionStateChange() { var _a, _b; if (((_a = this.peerConnection) === null || _a === void 0 ? void 0 : _a.iceConnectionState) === 'connected' || ((_b = this.peerConnection) === null || _b === void 0 ? void 0 : _b.iceConnectionState) === 'completed') { this.publicEventEmitter.emit(types_1.AnamEvent.CONNECTION_ESTABLISHED); // Start collecting stats every 5 seconds this.startStatsCollection(); } } onConnectionStateChange() { var _a; if (((_a = this.peerConnection) === null || _a === void 0 ? void 0 : _a.connectionState) === 'closed') { console.error('StreamingClient - onConnectionStateChange: Connection closed'); this.handleWebrtcFailure('The connection to our servers was lost. Please try again.'); } } handleWebrtcFailure(err) { console.error({ message: 'StreamingClient - handleWebrtcFailure: ', err }); if (err.name === 'NotAllowedError' && err.message === 'Permission denied') { this.publicEventEmitter.emit(types_1.AnamEvent.CONNECTION_CLOSED, types_1.ConnectionClosedCode.MICROPHONE_PERMISSION_DENIED); } else { this.publicEventEmitter.emit(types_1.AnamEvent.CONNECTION_CLOSED, types_1.ConnectionClosedCode.WEBRTC_FAILURE); } try { this.stopConnection(); } catch (error) { console.error('StreamingClient - handleWebrtcFailure: error stopping connection', error); } } onTrackEventHandler(event) { if (event.track.kind === 'video') { // start polling stats to detect successful video data received this.startSuccessMetricPolling(); this.videoStream = event.streams[0]; this.publicEventEmitter.emit(types_1.AnamEvent.VIDEO_STREAM_STARTED, this.videoStream); if (this.videoElement) { this.videoElement.srcObject = this.videoStream; const handle = this.videoElement.requestVideoFrameCallback(() => { var _a; // unregister the callback after the first frame (_a = this.videoElement) === null || _a === void 0 ? void 0 : _a.cancelVideoFrameCallback(handle); this.publicEventEmitter.emit(types_1.AnamEvent.VIDEO_PLAY_STARTED); if (!this.successMetricFired) { this.successMetricFired = true; (0, ClientMetrics_1.sendClientMetric)(ClientMetrics_1.ClientMetricMeasurement.CLIENT_METRIC_MEASUREMENT_SESSION_SUCCESS, '1', { detectionMethod: 'videoElement' }); } }); } } else if (event.track.kind === 'audio') { this.audioStream = event.streams[0]; this.publicEventEmitter.emit(types_1.AnamEvent.AUDIO_STREAM_STARTED, this.audioStream); } } /** * Set up the data channels for sending and receiving messages */ setupDataChannels() { return __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection) { console.error('StreamingClient - setupDataChannels: peer connection is not initialized'); return; } /** * Audio - Validate user-provided stream only * * If the user provided an audio stream, validate it has audio tracks * Microphone permission request will be handled asynchronously */ if (!this.disableInputAudio && this.inputAudioStream) { // verify the user provided stream has audio tracks if (!this.inputAudioStream.getAudioTracks().length) { throw new Error('StreamingClient - setupDataChannels: user provided stream does not have audio tracks'); } } /** * Text * * Create the data channel for sending and receiving text. * There is no input stream for text, instead the sending of data is triggered by a UI interaction. */ const dataChannel = this.peerConnection.createDataChannel('session', { ordered: true, }); dataChannel.onopen = () => { this.dataChannel = dataChannel !== null && dataChannel !== void 0 ? dataChannel : null; }; dataChannel.onclose = () => { }; // pass text message to the message history client dataChannel.onmessage = (event) => { try { const message = JSON.parse(event.data); // Handle known message types switch (message.messageType) { case types_1.DataChannelMessage.SPEECH_TEXT: this.internalEventEmitter.emit(types_1.InternalEvent.WEBRTC_CHAT_MESSAGE_RECEIVED, message.data); break; case types_1.DataChannelMessage.CLIENT_TOOL_EVENT: const webRtcToolEvent = message.data; this.internalEventEmitter.emit(types_1.InternalEvent.WEBRTC_CLIENT_TOOL_EVENT_RECEIVED, webRtcToolEvent); const clientToolEvent = modules_1.ToolCallManager.WebRTCClientToolEventToClientToolEvent(webRtcToolEvent); this.publicEventEmitter.emit(types_1.AnamEvent.CLIENT_TOOL_EVENT_RECEIVED, clientToolEvent); break; // Unknown message types are silently ignored to maintain forward compatibility default: break; } } catch (error) { console.error('Failed to parse data channel message:', error); } }; }); } /** * Request microphone permission asynchronously without blocking connection */ requestMicrophonePermissionAsync() { return __awaiter(this, void 0, void 0, function* () { if (this.inputAudioState.permissionState === types_1.AudioPermissionState.PENDING) { return; // Already requesting } this.inputAudioState = Object.assign(Object.assign({}, this.inputAudioState), { permissionState: types_1.AudioPermissionState.PENDING }); this.publicEventEmitter.emit(types_1.AnamEvent.MIC_PERMISSION_PENDING); try { const audioConstraints = { echoCancellation: true, }; // If an audio device ID is provided in the options, use it if (this.audioDeviceId) { audioConstraints.deviceId = { exact: this.audioDeviceId, }; } this.inputAudioStream = yield navigator.mediaDevices.getUserMedia({ audio: audioConstraints, }); this.inputAudioState = Object.assign(Object.assign({}, this.inputAudioState), { permissionState: types_1.AudioPermissionState.GRANTED }); this.publicEventEmitter.emit(types_1.AnamEvent.MIC_PERMISSION_GRANTED); // Now add the audio track to the existing connection yield this.setupAudioTrack(); } catch (error) { console.error('Failed to get microphone permission:', error); this.inputAudioState = Object.assign(Object.assign({}, this.inputAudioState), { permissionState: types_1.AudioPermissionState.DENIED }); const errorMessage = error instanceof Error ? error.message : String(error); this.publicEventEmitter.emit(types_1.AnamEvent.MIC_PERMISSION_DENIED, errorMessage); } }); } /** * Set up audio track and add it to the peer connection using replaceTrack */ setupAudioTrack() { return __awaiter(this, void 0, void 0, function* () { if (!this.peerConnection || !this.inputAudioStream) { return; } // verify the stream has audio tracks if (!this.inputAudioStream.getAudioTracks().length) { console.error('StreamingClient - setupAudioTrack: stream does not have audio tracks'); return; } // mute the audio tracks if the user has muted the microphone if (this.inputAudioState.isMuted) { this.muteAllAudioTracks(); } const audioTrack = this.inputAudioStream.getAudioTracks()[0]; // Find the audio sender const existingSenders = this.peerConnection.getSenders(); const audioSender = existingSenders.find((sender) => { var _a; return ((_a = sender.track) === null || _a === void 0 ? void 0 : _a.kind) === 'audio' || (sender.track === null && sender.dtmf !== null); }); if (audioSender) { // Replace existing track (or null track) with our audio track try { yield audioSender.replaceTrack(audioTrack); } catch (error) { console.error('Failed to replace audio track:', error); // Fallback: add track normally this.peerConnection.addTrack(audioTrack, this.inputAudioStream); } } else { // No audio sender found, add track normally this.peerConnection.addTrack(audioTrack, this.inputAudioStream); } // pass the stream to the callback this.publicEventEmitter.emit(types_1.AnamEvent.INPUT_AUDIO_STREAM_STARTED, this.inputAudioStream); }); } initPeerConnectionAndSendOffer() { return __awaiter(this, void 0, void 0, function* () { yield this.initPeerConnection(); if (!this.peerConnection) { console.error('StreamingClient - initPeerConnectionAndSendOffer: peer connection is not initialized'); return; } // create offer and set local description try { const offer = yield this.peerConnection.createOffer(); yield this.peerConnection.setLocalDescription(offer); } catch (error) { console.error('StreamingClient - initPeerConnectionAndSendOffer: error creating offer', error); } if (!this.peerConnection.localDescription) { throw new Error('StreamingClient - initPeerConnectionAndSendOffer: local description is null'); } yield this.signallingClient.sendOffer(this.peerConnection.localDescription); }); } shutdown() { return __awaiter(this, void 0, void 0, function* () { var _a; if (this.showPeerConnectionStatsReport) { const stats = yield ((_a = this.peerConnection) === null || _a === void 0 ? void 0 : _a.getStats()); if (stats) { const report = (0, ClientMetrics_1.createRTCStatsReport)(stats, this.peerConnectionStatsReportOutputFormat); if (report) { console.log(report, undefined, 2); } } } // stop stats collection if (this.statsCollectionInterval) { clearInterval(this.statsCollectionInterval); this.statsCollectionInterval = null; } // reset video frame polling if (this.successMetricPoller) { clearInterval(this.successMetricPoller); this.successMetricPoller = null; } this.successMetricFired = false; // stop the input audio stream try { if (this.inputAudioStream) { this.inputAudioStream.getTracks().forEach((track) => { track.stop(); }); } this.inputAudioStream = null; } catch (error) { console.error('StreamingClient - shutdown: error stopping input audio stream', error); } // stop the signalling client try { this.signallingClient.stop(); } catch (error) { console.error('StreamingClient - shutdown: error stopping signallilng', error); } // close the peer connection try { if (this.peerConnection && this.peerConnection.connectionState !== 'closed') { this.peerConnection.onconnectionstatechange = null; this.peerConnection.close(); this.peerConnection = null; } } catch (error) { console.error('StreamingClient - shutdown: error closing peer connection', error); } }); } } exports.StreamingClient = StreamingClient; //# sourceMappingURL=StreamingClient.js.map