UNPKG

vocal-call-sdk

Version:

A JavaScript SDK that provides a complete voice calling interface with WebSocket communication, audio recording/playback, and automatic UI management.

1,238 lines (1,034 loc) • 43.2 kB
const USER_INPUT_SAMPLE_RATE = 32000; //Default User input sample rate const AGENT_OUTPUT_SAMPLE_RATE = 24000; // Default Agent output sample rate /** * Upload call recording to R2 storage via n8n endpoint */ async function uploadCallRecording(recordingBlob, callId, uploadEndpoint) { try { const presignedResponse = await fetch(uploadEndpoint, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ call_id: callId }) }); if (!presignedResponse.ok) { throw new Error(`Failed to get presigned URL: ${presignedResponse.status} ${presignedResponse.statusText}`); } const presignedData = await presignedResponse.json(); if (!presignedData.success || !presignedData.presigned_url) { throw new Error('Invalid presigned URL response: ' + JSON.stringify(presignedData)); } const { presigned_url: presignedUrl, download_url: downloadUrl, file_key: fileKey } = presignedData; // Upload the recording to the presigned URL const uploadResponse = await fetch(presignedUrl, { method: 'PUT', headers: { 'Content-Type': 'audio/wav', }, body: recordingBlob }); if (!uploadResponse.ok) { throw new Error(`Failed to upload recording: ${uploadResponse.status} ${uploadResponse.statusText}`); } console.log('[UPLOAD] Download URL:', downloadUrl); return { success: true, downloadUrl: downloadUrl, fileKey: fileKey, uploadSize: recordingBlob.size }; } catch (error) { console.error('[UPLOAD] Error uploading recording:', error); return { success: false, error: error.message }; } } class DualStreamAudioProcessor { constructor(sampleRate = AGENT_OUTPUT_SAMPLE_RATE,micInputSample = USER_INPUT_SAMPLE_RATE) { this.sampleRate = sampleRate; this.micInputSampleRate = micInputSample; // Track original mic rate this.targetSampleRate = sampleRate; this.micRecordingData = []; // User voice only this.agentRecordingData = []; // Agent voice only this.isCallRecording = false; this.callRecordingSampleRate = sampleRate; // Anti-jitter buffer system for agent audio this.agentRecordingBuffer = []; // Continuous buffer for agent audio this.agentBufferSampleRate = sampleRate; this.lastAgentBufferTime = 0; this.minBufferChunks = 3; this.isBuffering = false; } startRecording() { this.micRecordingData = []; this.agentRecordingData = []; this.agentRecordingBuffer = []; this.isCallRecording = true; this.lastAgentBufferTime = 0; console.log('[DUAL_STREAM] Started dual-stream recording'); } stopRecording() { this.isCallRecording = false; // Convert agent buffer to final recording data this._finalizeAgentRecording(); if (this.micRecordingData.length > 0 || this.agentRecordingData.length > 0) { // Combine both streams properly at same sample rate const maxLength = Math.max(this.micRecordingData.length, this.agentRecordingData.length); const combinedData = new Float32Array(maxLength); // Mix both audio streams (matches original mixing logic) for (let i = 0; i < maxLength; i++) { const micSample = i < this.micRecordingData.length ? this.micRecordingData[i] : 0; const agentSample = i < this.agentRecordingData.length ? this.agentRecordingData[i] : 0; // Simple mixing - slightly favor user voice (matches original) combinedData[i] = (micSample + agentSample * 0.8) / 1.8; } console.log(`[DUAL_STREAM] Created combined recording: ${combinedData.length} samples`); const wavBlob = this.createWavBlob(combinedData, this.callRecordingSampleRate); return wavBlob; } console.log('[DUAL_STREAM] No recording data available'); return null; } // Add microphone audio continuously (matches original) addMicRecordingData(audioData) { if (this.isCallRecording && audioData) { // RESAMPLE mic data to target recording rate before storing let resampledMicData = audioData; if (this.micInputSampleRate !== this.targetSampleRate) { resampledMicData = this._resampleAudio(audioData, this.micInputSampleRate, this.targetSampleRate); } this.micRecordingData.push(...resampledMicData); // Sync agent recording length to match microphone this._syncAgentRecordingLength(); } } // Add agent audio to continuous buffer (anti-jitter) (matches original) addAgentRecordingData(audioData, sampleRate) { if (!this.isCallRecording || !audioData) return; // Resample to target sample rate if needed let resampledData = audioData; if (sampleRate !== this.agentBufferSampleRate) { resampledData = this._resampleAudio(audioData, sampleRate, this.agentBufferSampleRate); } // Add to continuous buffer this.agentRecordingBuffer.push(...resampledData); } // Sync agent recording length to match microphone (eliminates jitter) (matches original) _syncAgentRecordingLength() { const targetLength = this.micRecordingData.length; if (this.agentRecordingData.length < targetLength) { // Need to add more agent data const needed = targetLength - this.agentRecordingData.length; const available = this.agentRecordingBuffer.length - this.lastAgentBufferTime; if (available > 0) { const toTake = Math.min(needed, available); const startIndex = this.lastAgentBufferTime; const endIndex = startIndex + toTake; // Extract audio from buffer const agentSlice = this.agentRecordingBuffer.slice(startIndex, endIndex); // Ensure we have enough data if (agentSlice.length < needed) { // Pad with silence if not enough agent audio const paddedSlice = new Float32Array(needed); paddedSlice.set(agentSlice); this.agentRecordingData.push(...paddedSlice); } else { this.agentRecordingData.push(...agentSlice); } this.lastAgentBufferTime = endIndex; } else { // No agent audio available, add silence const silence = new Float32Array(needed); this.agentRecordingData.push(...silence); } } } // Finalize agent recording from buffer (matches original) _finalizeAgentRecording() { // Make sure agent recording matches microphone length const targetLength = this.micRecordingData.length; if (this.agentRecordingData.length < targetLength) { const needed = targetLength - this.agentRecordingData.length; const available = this.agentRecordingBuffer.length - this.lastAgentBufferTime; if (available > 0) { const toTake = Math.min(needed, available); const agentSlice = this.agentRecordingBuffer.slice(this.lastAgentBufferTime, this.lastAgentBufferTime + toTake); this.agentRecordingData.push(...agentSlice); } // Pad with silence if still not enough if (this.agentRecordingData.length < targetLength) { const stillNeeded = targetLength - this.agentRecordingData.length; const silence = new Float32Array(stillNeeded); this.agentRecordingData.push(...silence); } } } // Simple linear resampling function (matches original) _resampleAudio(inputData, inputSampleRate, outputSampleRate) { if (inputSampleRate === outputSampleRate) { return inputData; } const ratio = inputSampleRate / outputSampleRate; const outputLength = Math.floor(inputData.length / ratio); const outputData = new Float32Array(outputLength); for (let i = 0; i < outputLength; i++) { const inputIndex = i * ratio; const lowerIndex = Math.floor(inputIndex); const upperIndex = Math.min(Math.ceil(inputIndex), inputData.length - 1); const fraction = inputIndex - lowerIndex; // Linear interpolation const lowerValue = inputData[lowerIndex] || 0; const upperValue = inputData[upperIndex] || 0; outputData[i] = lowerValue + (upperValue - lowerValue) * fraction; } return outputData; } createWavBlob(audioData, sampleRate = AGENT_OUTPUT_SAMPLE_RATE, numChannels = 1) { const length = audioData.length; const bytesPerSample = 2; // 16-bit const blockAlign = numChannels * bytesPerSample; const byteRate = sampleRate * blockAlign; const dataSize = length * bytesPerSample; const fileSize = 36 + dataSize; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); // Helper function to write strings const writeString = (offset, string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }; // WAV header with proper values writeString(0, 'RIFF'); // ChunkID view.setUint32(4, fileSize, true); // ChunkSize (file size - 8) writeString(8, 'WAVE'); // Format writeString(12, 'fmt '); // Subchunk1ID view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16) view.setUint16(20, 1, true); // AudioFormat (PCM = 1) view.setUint16(22, numChannels, true); // NumChannels view.setUint32(24, sampleRate, true); // SampleRate view.setUint32(28, byteRate, true); // ByteRate view.setUint16(32, blockAlign, true); // BlockAlign view.setUint16(34, 16, true); // BitsPerSample writeString(36, 'data'); // Subchunk2ID view.setUint32(40, dataSize, true); // Subchunk2Size // Better sample conversion for recordings let offset = 44; let maxSample = 0; // First pass: find the maximum sample for intelligent normalization for (let i = 0; i < length; i++) { maxSample = Math.max(maxSample, Math.abs(audioData[i])); } // Calculate normalization factor (prevent over-amplification of noise) const normalizationFactor = maxSample > 0.05 ? Math.min(0.95 / maxSample, 4.0) : 1.0; // Second pass: convert to 16-bit with intelligent normalization for (let i = 0; i < length; i++) { let sample = audioData[i] * normalizationFactor; // Gentle limiting to prevent harsh clipping in recordings sample = Math.max(-0.98, Math.min(0.98, sample)); // Convert to 16-bit integer with proper scaling const intSample = Math.round(sample * 32767); view.setInt16(offset, intSample, true); offset += 2; } return new Blob([buffer], { type: 'audio/wav' }); } getRecordingStats() { return { micSamples: this.micRecordingData.length, agentSamples: this.agentRecordingData.length, agentBufferSamples: this.agentRecordingBuffer.length, isRecording: this.isCallRecording }; } } export class VocalCallSDK { constructor({ agentId, callId, inactiveText = 'Talk to Assistant', activeText = 'Listening...', size = 'medium', className = '', container = null, config = {} }) { // Core configuration this.agentId = agentId; this.callId = callId; this.inactiveText = inactiveText; this.activeText = activeText; this.size = size; this.className = className; this.container = container; // Default endpoints and settings with recording options this.config = { endpoints: { websocket: 'wss://call.vocallabs.ai/ws/', upload: config.endpoints?.upload }, audio: { echoCancellation: true, noiseSuppression: true, // ADD THESE FOR TESTING: userInputSampleRate: config.audio?.userInputSampleRate || USER_INPUT_SAMPLE_RATE, agentOutputSampleRate: config.audio?.agentOutputSampleRate || AGENT_OUTPUT_SAMPLE_RATE }, recording: { enabled: true, autoUpload: true, uploadOnCallEnd: true }, ...config }; // Internal state this.status = 'idle'; this.isRecording = false; this.lastDisconnectReason = null; this.recordingBlob = null; // Store recording blob to prevent double-stop issues // Core components this.wsClient = null; this.audioService = null; this.audioProcessor = null; // NEW: Enhanced dual-stream processor this.buttonElement = null; this.statusElement = null; // Timing and state tracking this.startTime = null; this.endTime = null; this.connectionTimer = null; // Event callbacks - EXPANDED with upload events this.eventCallbacks = { onCallStart: [], onCallEnd: [], onRecordingStart: [], onRecordingStop: [], onStatusChange: [], onError: [], onUploadStart: [], onUploadSuccess: [], onUploadError: [] }; // Initialize the SDK this._initializeComponents(); } /* ------------------------------------------------------------------ * Public API Methods * ----------------------------------------------------------------*/ on(event, callback) { if (this.eventCallbacks[event]) { this.eventCallbacks[event].push(callback); } return this; } off(event, callback) { if (this.eventCallbacks[event]) { const index = this.eventCallbacks[event].indexOf(callback); if (index > -1) { this.eventCallbacks[event].splice(index, 1); } } return this; } async startCall() { return this._handleButtonClick(); } async endCall() { if (this.isRecording) { return this._handleButtonClick(); } } getStatus() { return { status: this.status, isRecording: this.isRecording, isConnected: this.wsClient?.isConnected || false, lastDisconnectReason: this.lastDisconnectReason, recordingStats: this.audioProcessor?.getRecordingStats() || null }; } renderButton(containerElement = null) { const target = containerElement || this.container; if (!target) { console.error('[VocalCallSDK] No container specified for button rendering'); return; } const container = typeof target === 'string' ? document.querySelector(target) : target; if (!container) { console.error('[VocalCallSDK] Container element not found'); return; } this._createButtonUI(container); return this; } destroy() { this._cleanup(); if (this.buttonElement) { this.buttonElement.remove(); } if (this.statusElement) { this.statusElement.remove(); } } /* ------------------------------------------------------------------ * Core Audio and WebSocket Classes (Internal) * ----------------------------------------------------------------*/ _initializeComponents() { this._initializeWebSocketClient(); this._initializeAudioService(); // NEW: Initialize dual-stream audio processor for recording if (this.config.recording.enabled) { this.audioProcessor = new DualStreamAudioProcessor(this.config.audio.agentOutputSampleRate,this.config.audio.userInputSampleRate); this.audioProcessor.micInputSampleRate = this.config.audio.userInputSampleRate; } } _initializeWebSocketClient() { this.wsClient = { socket: null, isConnected: false, manualDisconnect: false, streamId: null, disconnectSource: null, connect: () => { if (this.wsClient.socket) { try { this.wsClient.socket.close(); } catch {} } const url = `${this.config.endpoints.websocket}?agent=${this.agentId.trim()}_${this.callId.trim()}_web_${this.config.audio.userInputSampleRate}`; this.wsClient.socket = new WebSocket(url); console.log('[WS] Connecting to', url); this.wsClient.socket.onopen = (e) => this._handleWSOpen(e); this.wsClient.socket.onmessage = (e) => this._handleWSMessage(e); this.wsClient.socket.onclose = (e) => this._handleWSClose(e); this.wsClient.socket.onerror = (e) => this._handleWSError(e); }, disconnect: async (source = 'manual') => { if (!this.wsClient.socket) return; this.wsClient.disconnectSource = source; this.wsClient.manualDisconnect = true; // NEW: Handle recording upload before disconnecting await this._handleRecordingUpload(source); try { if (this.wsClient.socket.readyState === WebSocket.OPEN) { this.wsClient.socket.send(JSON.stringify({ event: 'disconnect', reason: 'Client disconnected' })); } this.wsClient.socket.onopen = this.wsClient.socket.onmessage = this.wsClient.socket.onclose = this.wsClient.socket.onerror = null; this.wsClient.socket.close(1000, 'Client disconnected'); } finally { this.wsClient.isConnected = false; this.wsClient.socket = null; this._triggerEvent('onCallEnd', this.wsClient.disconnectSource); } }, sendAudio: (buffer) => { if (!this.wsClient.isConnected) return false; if (!this.wsClient.streamId) { this.wsClient.streamId = 'MZ' + Math.random().toString(36).slice(2); } const b64 = btoa(String.fromCharCode(...new Uint8Array(buffer))); try { this.wsClient.socket.send(JSON.stringify({ event: 'media', media: { track: 'inbound', timestamp: Date.now().toString(), payload: b64 }, streamId: this.wsClient.streamId })); return true; } catch (e) { console.error('[WS] Error sending audio:', e); return false; } }, sendPlayedStream: () => { if (!this.wsClient.isConnected) return; try { this.wsClient.socket.send(JSON.stringify({ event: 'playedStream', name: 'audio' })); } catch {} } }; } _initializeAudioService() { this.audioService = { audioContext: null, stream: null, processor: null, source: null, isRecording: false, audioQueue: [], isPlaying: false, nextTime: 0, activeSources: new Set(), queueTimer: null, pendingCheckpoint: false, minBufferChunks: 3, isBuffering: false, initialize: async () => { this.audioService.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: this.config.audio.userInputSampleRate }); return true; }, startRecording: async (sendFn) => { if (this.audioService.isRecording) return false; if (!this.audioService.audioContext) await this.audioService.initialize(); if (this.audioService.audioContext.state === 'suspended') { await this.audioService.audioContext.resume(); } try { this.audioService.stream = await navigator.mediaDevices.getUserMedia({ audio: { channelCount: 1, sampleRate: this.config.audio.userInputSampleRate, echoCancellation: this.config.audio.echoCancellation, noiseSuppression: this.config.audio.noiseSuppression } }); this.audioService.source = this.audioService.audioContext.createMediaStreamSource(this.audioService.stream); // Try to use modern AudioWorkletNode, fallback to ScriptProcessorNode let useAudioWorklet = false; try { if (this.audioService.audioContext.audioWorklet) { // Create inline AudioWorklet processor const processorCode = ` class AudioProcessor extends AudioWorkletProcessor { process(inputs, outputs, parameters) { const input = inputs[0]; if (input.length > 0) { const inputData = input[0]; this.port.postMessage(inputData); } return true; } } registerProcessor('audio-processor', AudioProcessor); `; const processorBlob = new Blob([processorCode], { type: 'application/javascript' }); const processorUrl = URL.createObjectURL(processorBlob); await this.audioService.audioContext.audioWorklet.addModule(processorUrl); this.audioService.processor = new AudioWorkletNode(this.audioService.audioContext, 'audio-processor'); this.audioService.processor.port.onmessage = (event) => { if (!this.audioService.isRecording) return; const float = event.data; // NEW: Add microphone audio to dual-stream recorder if (this.config.recording.enabled && this.audioProcessor) { this.audioProcessor.addMicRecordingData(float); } // WebSocket sending logic const int16 = new Int16Array(float.length); for (let i = 0; i < float.length; i++) { const v = Math.max(-1, Math.min(1, float[i])); int16[i] = Math.round(v * 32767); } sendFn(int16.buffer); }; URL.revokeObjectURL(processorUrl); useAudioWorklet = true; console.log('[AUDIO] Using modern AudioWorkletNode'); } } catch (workletError) { console.log('[AUDIO] AudioWorklet not supported, falling back to ScriptProcessorNode'); } // Fallback to ScriptProcessorNode if AudioWorklet fails if (!useAudioWorklet) { this.audioService.processor = this.audioService.audioContext.createScriptProcessor(1024, 1, 1); this.audioService.processor.onaudioprocess = e => { if (!this.audioService.isRecording) return; const float = e.inputBuffer.getChannelData(0); // NEW: Add microphone audio to dual-stream recorder (matches original) if (this.config.recording.enabled && this.audioProcessor) { this.audioProcessor.addMicRecordingData(float); } // Original WebSocket sending logic const int16 = new Int16Array(float.length); for (let i = 0; i < float.length; i++) { const v = Math.max(-1, Math.min(1, float[i])); int16[i] = Math.round(v * 32767); } sendFn(int16.buffer); }; } // NEW: Start dual-stream recording collection if (this.config.recording.enabled && this.audioProcessor) { this.audioProcessor.startRecording(); } this.audioService.source.connect(this.audioService.processor); this.audioService.processor.connect(this.audioService.audioContext.destination); this.audioService.isRecording = true; this.isRecording = true; this._triggerEvent('onRecordingStart'); this._updateUI(); return true; } catch (error) { console.error('[RECORDING] Failed to start recording:', error); this._triggerEvent('onError', error); return false; } }, stopRecording: () => { if (!this.audioService.isRecording) return this.recordingBlob; // Return stored blob if already stopped this.audioService.isRecording = false; this.isRecording = false; // NEW: Get the combined recording blob from dual-stream processor and store it if (this.config.recording.enabled && this.audioProcessor) { this.recordingBlob = this.audioProcessor.stopRecording(); console.log('[RECORDING] Stored recording blob:', this.recordingBlob ? `${this.recordingBlob.size} bytes` : 'null'); } this._triggerEvent('onRecordingStop'); this._updateUI(); try { if (this.audioService.processor) { if (this.audioService.processor.port) { // AudioWorkletNode cleanup this.audioService.processor.port.onmessage = null; } this.audioService.processor.disconnect(); } if (this.audioService.source) { this.audioService.source.disconnect(); } if (this.audioService.stream) { this.audioService.stream.getTracks().forEach(t => t.stop()); } } catch (error) { console.error('[DISCONNECT] Error stopping recording:', error); } return this.recordingBlob; }, playAudio: async (buffer, sr = this.config.audio.agentOutputSampleRate) => { if (!this.audioService.audioContext) await this.audioService.initialize(); if (this.audioService.audioContext.state === 'suspended') { await this.audioService.audioContext.resume(); } try { const int16 = new Int16Array(buffer); // Convert to float for processing const float = new Float32Array(int16.length); for (let i = 0; i < int16.length; i++) { float[i] = int16[i] / 32768.0; } // NEW: Add agent audio to dual-stream recorder (matches original) if (this.config.recording.enabled && this.audioProcessor) { this.audioProcessor.addAgentRecordingData(float, sr); } const processedFloat = this._processIncomingAudio(float); const audioBuffer = this.audioService.audioContext.createBuffer(1, processedFloat.length, sr); audioBuffer.getChannelData(0).set(processedFloat); this.audioService.audioQueue.push(audioBuffer); if (!this.audioService.isPlaying && this.audioService.audioQueue.length >= this.audioService.minBufferChunks) { this.audioService.isBuffering = false; this._processAudioQueue(); } } catch (error) { console.error('[PLAYBACK] Error playing audio:', error); } }, clearAudio: () => { if (this.audioService.queueTimer) { clearTimeout(this.audioService.queueTimer); this.audioService.queueTimer = null; } this.audioService.activeSources.forEach(src => { try { src.stop(); } catch {} }); this.audioService.activeSources.clear(); this.audioService.audioQueue = []; this.audioService.isPlaying = false; this.audioService.nextTime = this.audioService.audioContext?.currentTime || 0; this.audioService.pendingCheckpoint = false; this.audioService.isBuffering = false; }, handleCheckpoint: () => { if (this.audioService.audioQueue.length === 0 && this.audioService.activeSources.size === 0) { if (this.wsClient.isConnected) this.wsClient.sendPlayedStream(); } else { this.audioService.pendingCheckpoint = true; } }, cleanup: () => { if (this.audioService.isRecording) this.audioService.stopRecording(); this.audioService.clearAudio(); if (this.audioService.audioContext && this.audioService.audioContext.state !== 'closed') { this.audioService.audioContext.close(); } } }; } /* ------------------------------------------------------------------ * Enhanced Recording Upload Handler * ----------------------------------------------------------------*/ async _handleRecordingUpload(disconnectSource) { if (!this.config.recording.enabled || !this.config.recording.uploadOnCallEnd || !this.audioProcessor) { return; } try { // Use stored recording blob instead of stopping recording again let recordingBlob = this.recordingBlob; // If we don't have a stored blob, try to get it from stopping recording if (!recordingBlob && this.audioService.isRecording) { recordingBlob = this.audioService.stopRecording(); } if (!recordingBlob || recordingBlob.size === 0) { console.log('[UPLOAD] No recording data to upload'); return; } console.log(`[UPLOAD] Starting upload for call ${this.callId}, size: ${recordingBlob.size} bytes`); const stats = this.audioProcessor.getRecordingStats(); console.log('[UPLOAD] Recording stats:', stats); this._triggerEvent('onUploadStart', { callId: this.callId, size: recordingBlob.size, stats }); const result = await uploadCallRecording( recordingBlob, this.callId, this.config.endpoints.upload, ); if (result.success) { console.log('[UPLOAD] Recording uploaded successfully:', result.downloadUrl); this._triggerEvent('onUploadSuccess', { callId: this.callId, downloadUrl: result.downloadUrl, fileKey: result.fileKey, uploadSize: result.uploadSize, disconnectSource, stats }); } else { console.error('[UPLOAD] Recording upload failed:', result.error); this._triggerEvent('onUploadError', { callId: this.callId, error: result.error, disconnectSource, stats }); } } catch (error) { console.error('[UPLOAD] Unexpected error during recording upload:', error); this._triggerEvent('onUploadError', { callId: this.callId, error: error.message, disconnectSource }); } } /* ------------------------------------------------------------------ * WebSocket Event Handlers * ----------------------------------------------------------------*/ _handleWSOpen(evt) { this.wsClient.isConnected = true; this.wsClient.manualDisconnect = false; this.wsClient.disconnectSource = null; // Clear any previous recording blob this.recordingBlob = null; this.wsClient.socket.send(JSON.stringify({ event: 'start', start: { streamId: 'inbound', mediaFormat: { Encoding: 'audio/x-l16', sampleRate: this.config.audio.userInputSampleRate } } })); clearTimeout(this.connectionTimer); this._setStatus('connected'); this.startTime = new Date(); this.endTime = null; this._startRecording(); this._triggerEvent('onCallStart'); } async _handleWSMessage(evt) { let msg; try { msg = JSON.parse(evt.data); } catch (e) { return; } if (msg.event === 'hangup') { this.endTime = new Date(); this._stopEverything('agent'); if (this.wsClient.isConnected) { await this.wsClient.disconnect('agent'); } this._setStatus('idle'); this.lastDisconnectReason = 'agent'; } else if (msg.event === 'media' && msg.media?.payload) { // Handle server audio (agent voice) if (msg.media.track === 'outbound' || !msg.media.track) { try { const bin = Uint8Array.from(atob(msg.media.payload), c => c.charCodeAt(0)).buffer; console.log(`[DEBUG] Received agent audio: ${bin.byteLength} bytes, expecting ${this.config.audio.agentOutputSampleRate}Hz`); await this.audioService.playAudio(bin, this.config.audio.agentOutputSampleRate); // This will record agent audio } catch (error) { console.error('[PLAYBACK] Error processing server audio:', error); } } } else if (msg.event === 'playAudio' && msg.media?.payload) { // Backward compatibility for agent audio try { const bin = Uint8Array.from(atob(msg.media.payload), c => c.charCodeAt(0)).buffer; await this.audioService.playAudio(bin, this.config.audio.agentOutputSampleRate); // This will record agent audio } catch (error) { console.error('[PLAYBACK] Error processing agent audio:', error); } } else if (msg.event === 'clearAudio') { this.audioService.clearAudio(); } else if (msg.event === 'checkpoint') { this.audioService.handleCheckpoint(); } else if (msg.event === 'error') { this._stopEverything('error_event'); this._setStatus('error'); } } async _handleWSClose(evt) { if (!this.wsClient.isConnected) return; this.wsClient.isConnected = false; const source = this.wsClient.manualDisconnect ? this.wsClient.disconnectSource || 'manual_disconnect' : 'server_initiated'; // NEW: Handle recording upload before cleanup await this._handleRecordingUpload(source); this._stopEverything(`ws_disconnected_${source}`); this._setStatus('idle'); this.lastDisconnectReason = source; } _handleWSError(err) { this.wsClient.isConnected = false; this._setStatus('error'); this._triggerEvent('onError', err); } /* ------------------------------------------------------------------ * Audio Processing Methods (Enhanced with buffering) * ----------------------------------------------------------------*/ _processIncomingAudio(audioData) { const processed = new Float32Array(audioData.length); let maxSample = 0; for (let i = 0; i < audioData.length; i++) { maxSample = Math.max(maxSample, Math.abs(audioData[i])); } const normalizationFactor = maxSample > 0.1 ? Math.min(0.85 / maxSample, 2.5) : 1.8; for (let i = 0; i < audioData.length; i++) { let sample = audioData[i]; if (Math.abs(sample) < 0.002) { sample = 0; } sample *= normalizationFactor; if (sample > 0.9) sample = 0.9 + (sample - 0.9) * 0.1; if (sample < -0.9) sample = -0.9 + (sample + 0.9) * 0.1; processed[i] = sample; } return processed; } _processAudioQueue() { // if (this.audioService.audioQueue.length === 0) { // this.audioService.isPlaying = false; // this.audioService.isBuffering = true; // this._maybeAck(); // return; // } if (this.audioService.audioQueue.length === 0) { this.audioService.isPlaying = false; this._maybeAck(); return; } this.audioService.isPlaying = true; const now = this.audioService.audioContext.currentTime; this.audioService.nextTime = Math.max(this.audioService.nextTime, now + 0.005); const buffer = this.audioService.audioQueue.shift(); const source = this.audioService.audioContext.createBufferSource(); source.buffer = buffer; const playbackGain = this.audioService.audioContext.createGain(); playbackGain.gain.value = 1.2; const filter = this.audioService.audioContext.createBiquadFilter(); filter.type = 'lowpass'; filter.frequency.value = 8000; filter.Q.value = 0.5; source.connect(filter); filter.connect(playbackGain); playbackGain.connect(this.audioService.audioContext.destination); source.start(this.audioService.nextTime); this.audioService.activeSources.add(source); source.onended = () => { this.audioService.activeSources.delete(source); this._maybeAck(); }; this.audioService.nextTime += buffer.duration; // Enhanced buffering logic (matches original) // if (this.audioService.audioQueue.length >= this.audioService.minBufferChunks) { // this.audioService.queueTimer = setTimeout(() => this._processAudioQueue(), 1); // } else if (this.audioService.audioQueue.length === 0) { // this.audioService.isPlaying = false; // this.audioService.isBuffering = true; // this._maybeAck(); // } else { // this.audioService.isPlaying = false; // this.audioService.isBuffering = true; // } this.audioService.queueTimer = setTimeout(() => this._processAudioQueue(), 3); } _maybeAck() { if (this.audioService.pendingCheckpoint && this.audioService.audioQueue.length <= 1){ this.audioService.pendingCheckpoint = false; if (this.wsClient.isConnected) this.wsClient.sendPlayedStream(); } } async _startRecording() { if (!this.wsClient.isConnected) { this._setStatus('error'); return; } const ok = await this.audioService.startRecording(buf => { if (this.wsClient.isConnected) { return this.wsClient.sendAudio(buf); } return false; }); if (!ok) { console.error('[RECORDING] Failed to start recording'); this._setStatus('error'); } } _stopEverything(reason = 'manual') { if (this.audioService.isRecording) { this.audioService.stopRecording(); } this.audioService.clearAudio(); } async _handleButtonClick() { if (!this.agentId || !this.callId) { this._setStatus('error'); return; } await this.audioService.audioContext?.resume().catch(() => {}); if (this.isRecording) { if (!this.endTime) { this.endTime = new Date(); } this._stopEverything('user_clicked_stop'); await this.wsClient.disconnect('user'); this._setStatus('idle'); this.lastDisconnectReason = 'user'; return; } if (this.status === 'idle' || this.status === 'error') { this._setStatus('connecting'); this.wsClient.manualDisconnect = false; this.wsClient.connect(); this.connectionTimer = setTimeout(() => { if (!this.wsClient.isConnected) { this._setStatus('error'); if (this.wsClient.socket) { this.wsClient.manualDisconnect = true; this.wsClient.disconnect('connection_timeout'); } } }, 8000); return; } if (this.status === 'connected' && !this.isRecording) { this._startRecording(); } } _createButtonUI(container) { const wrapper = document.createElement('div'); wrapper.className = 'vocal-call-wrapper inline-flex flex-col items-center gap-1'; this.buttonElement = document.createElement('button'); this.buttonElement.className = this._getButtonClasses(); this.buttonElement.setAttribute('aria-label', this.isRecording ? 'Stop recording' : 'Start recording'); this.buttonElement.addEventListener('click', () => this._handleButtonClick()); this.statusElement = document.createElement('div'); this.statusElement.className = 'text-xs font-medium'; wrapper.appendChild(this.buttonElement); wrapper.appendChild(this.statusElement); container.appendChild(wrapper); this._updateUI(); } _getButtonClasses() { const baseClasses = 'relative font-medium shadow transition text-white'; const sizeClasses = { 'small': 'px-3 py-1 text-sm rounded-md', 'medium': 'px-4 py-2 text-base rounded-lg', 'large': 'px-6 py-3 text-lg rounded-xl' }; const statusClasses = { 'connecting': 'bg-amber-500 cursor-not-allowed opacity-50', 'error': 'bg-rose-600 cursor-not-allowed opacity-50', 'connected': this.isRecording ? 'bg-red-600 hover:brightness-110' : 'bg-green-600 hover:brightness-110', 'idle': 'bg-slate-700/80 hover:brightness-110' }; return [ baseClasses, sizeClasses[this.size] || sizeClasses.medium, statusClasses[this.status] || statusClasses.idle, this.className ].filter(Boolean).join(' '); } _updateUI() { if (!this.buttonElement) return; const buttonText = this.isRecording ? this.activeText : this.inactiveText; this.buttonElement.className = this._getButtonClasses(); this.buttonElement.innerHTML = ` ${buttonText} <span class="ml-2 inline-block h-2 w-2 rounded-full bg-white/70 animate-ping"></span> `; this.buttonElement.disabled = this.status === 'connecting' || this.status === 'error'; this.buttonElement.setAttribute('aria-label', this.isRecording ? 'Stop recording' : 'Start recording'); if (this.statusElement) { let statusText = ''; let statusClass = ''; if (this.status === 'connecting') { statusText = 'Connecting…'; statusClass = 'text-amber-500'; } else if (this.status === 'error') { statusText = 'Connection error. Try again.'; statusClass = 'text-rose-600'; } this.statusElement.textContent = statusText; this.statusElement.className = `text-xs font-medium ${statusClass}`; } this._triggerEvent('onStatusChange', this.getStatus()); } _setStatus(newStatus) { this.status = newStatus; this._updateUI(); } _triggerEvent(eventName, ...args) { this.eventCallbacks[eventName]?.forEach(callback => { try { callback(...args); } catch (error) { console.error(`Error in ${eventName} callback:`, error); } }); } _cleanup() { clearTimeout(this.connectionTimer); // Clear recording blob this.recordingBlob = null; this._stopEverything('component_unmount'); if (this.wsClient) { this.wsClient.disconnect('component_unmount'); } if (this.audioService) { this.audioService.cleanup(); } // NEW: Cleanup audio processor if (this.audioProcessor) { this.audioProcessor = null; } } } // Export for npm package export default VocalCallSDK;