UNPKG

react-native-deepgram

Version:

React Native SDK for Deepgram's AI-powered speech-to-text, real-time transcription, and text intelligence APIs. Supports live audio streaming, file transcription, sentiment analysis, and topic detection for iOS and Android.

456 lines (455 loc) 16.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.useDeepgramVoiceAgent = useDeepgramVoiceAgent; var _react = require("react"); var _reactNative = require("react-native"); var _NativeDeepgram = require("./NativeDeepgram.js"); var _askMicPermission = require("./helpers/askMicPermission.js"); const DEFAULT_AGENT_ENDPOINT = 'wss://agent.deepgram.com/v1/agent/converse'; const DEFAULT_INPUT_SAMPLE_RATE = 24_000; const BASE_NATIVE_SAMPLE_RATE = 48_000; const eventName = _reactNative.Platform.select({ ios: 'DeepgramAudioPCM', android: 'AudioChunk', default: 'DeepgramAudioPCM' }); const ensureArrayBuffer = data => { if (!data) return null; if (data instanceof ArrayBuffer) return data; if (ArrayBuffer.isView(data)) { const view = data; if (view.buffer instanceof ArrayBuffer) { return view.buffer.slice(view.byteOffset, view.byteOffset + view.byteLength); } const copy = new Uint8Array(view.byteLength); copy.set(new Uint8Array(view.buffer, view.byteOffset, view.byteLength)); return copy.buffer; } return null; }; const hasKeys = (value, keys) => typeof value === 'object' && value !== null && keys.every(key => key in value); const computeDownsampleFactor = target => { if (!target || target >= BASE_NATIVE_SAMPLE_RATE) { return 1; } const ratio = Math.round(BASE_NATIVE_SAMPLE_RATE / target); return ratio > 0 ? ratio : 1; }; function useDeepgramVoiceAgent({ endpoint = DEFAULT_AGENT_ENDPOINT, defaultSettings, autoStartMicrophone = true, downsampleFactor, onBeforeConnect, onConnect, onClose, onError, onMessage, onWelcome, onSettingsApplied, onConversationText, onAgentThinking, onAgentStartedSpeaking, onAgentAudioDone, onUserStartedSpeaking, onFunctionCallRequest, onFunctionCallResponse, onPromptUpdated, onSpeakUpdated, onInjectionRefused, onWarning, onServerError } = {}) { const ws = (0, _react.useRef)(null); const audioSub = (0, _react.useRef)(null); const currentDownsample = (0, _react.useRef)(downsampleFactor ?? computeDownsampleFactor(DEFAULT_INPUT_SAMPLE_RATE)); const microphoneActive = (0, _react.useRef)(false); const defaultSettingsRef = (0, _react.useRef)(defaultSettings); const endpointRef = (0, _react.useRef)(endpoint); const onBeforeConnectRef = (0, _react.useRef)(onBeforeConnect); const onConnectRef = (0, _react.useRef)(onConnect); const onCloseRef = (0, _react.useRef)(onClose); const onErrorRef = (0, _react.useRef)(onError); const onMessageRef = (0, _react.useRef)(onMessage); const onWelcomeRef = (0, _react.useRef)(onWelcome); const onSettingsAppliedRef = (0, _react.useRef)(onSettingsApplied); const onConversationTextRef = (0, _react.useRef)(onConversationText); const onAgentThinkingRef = (0, _react.useRef)(onAgentThinking); const onAgentStartedSpeakingRef = (0, _react.useRef)(onAgentStartedSpeaking); const onAgentAudioDoneRef = (0, _react.useRef)(onAgentAudioDone); const onUserStartedSpeakingRef = (0, _react.useRef)(onUserStartedSpeaking); const onFunctionCallRequestRef = (0, _react.useRef)(onFunctionCallRequest); const onFunctionCallResponseRef = (0, _react.useRef)(onFunctionCallResponse); const onPromptUpdatedRef = (0, _react.useRef)(onPromptUpdated); const onSpeakUpdatedRef = (0, _react.useRef)(onSpeakUpdated); const onInjectionRefusedRef = (0, _react.useRef)(onInjectionRefused); const onWarningRef = (0, _react.useRef)(onWarning); const onServerErrorRef = (0, _react.useRef)(onServerError); const autoStartMicRef = (0, _react.useRef)(autoStartMicrophone); const sanitizeAudioSettings = (0, _react.useCallback)(audio => { if (!audio) { return undefined; } const sanitized = {}; if (audio.input) { sanitized.input = { ...audio.input }; } Object.entries(audio).forEach(([key, value]) => { if (key === 'input' || key === 'output') { return; } sanitized[key] = value; }); return Object.keys(sanitized).length > 0 ? sanitized : undefined; }, []); const sanitizeAgentConfig = (0, _react.useCallback)(agent => { if (!agent) { return undefined; } const sanitized = {}; Object.entries(agent).forEach(([key, value]) => { if (key === 'speak') { return; } sanitized[key] = value; }); return Object.keys(sanitized).length > 0 ? sanitized : undefined; }, []); const sanitizeSettings = (0, _react.useCallback)(settings => { if (!settings) { return undefined; } const sanitized = {}; Object.entries(settings).forEach(([key, value]) => { if (key === 'audio') { const audio = sanitizeAudioSettings(value); if (audio) { sanitized.audio = audio; } return; } if (key === 'agent') { const agent = sanitizeAgentConfig(value); if (agent) { sanitized.agent = agent; } return; } sanitized[key] = value; }); return sanitized; }, [sanitizeAgentConfig, sanitizeAudioSettings]); defaultSettingsRef.current = defaultSettings; endpointRef.current = endpoint; onBeforeConnectRef.current = onBeforeConnect; onConnectRef.current = onConnect; onCloseRef.current = onClose; onErrorRef.current = onError; onMessageRef.current = onMessage; onWelcomeRef.current = onWelcome; onSettingsAppliedRef.current = onSettingsApplied; onConversationTextRef.current = onConversationText; onAgentThinkingRef.current = onAgentThinking; onAgentStartedSpeakingRef.current = onAgentStartedSpeaking; onAgentAudioDoneRef.current = onAgentAudioDone; onUserStartedSpeakingRef.current = onUserStartedSpeaking; onFunctionCallRequestRef.current = onFunctionCallRequest; onFunctionCallResponseRef.current = onFunctionCallResponse; onPromptUpdatedRef.current = onPromptUpdated; onSpeakUpdatedRef.current = onSpeakUpdated; onInjectionRefusedRef.current = onInjectionRefused; onWarningRef.current = onWarning; onServerErrorRef.current = onServerError; autoStartMicRef.current = autoStartMicrophone; if (downsampleFactor != null) { currentDownsample.current = downsampleFactor; } const cleanup = (0, _react.useCallback)(() => { audioSub.current?.remove(); audioSub.current = null; if (microphoneActive.current) { _NativeDeepgram.Deepgram.stopRecording().catch(() => {}); microphoneActive.current = false; } const socket = ws.current; if (socket) { ws.current = null; try { if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) { socket.close(1000, 'cleanup'); } else { socket.close(); } } catch { // ignore socket close errors } } }, []); (0, _react.useEffect)(() => () => cleanup(), [cleanup]); const handleMicChunk = (0, _react.useCallback)(ev => { const socket = ws.current; if (!socket || socket.readyState !== WebSocket.OPEN) { return; } const factor = currentDownsample.current ?? 1; let chunk = null; if (typeof ev?.b64 === 'string') { const binary = Uint8Array.from(atob(ev.b64), c => c.charCodeAt(0)); const float32 = new Float32Array(binary.buffer); const downsampled = factor > 1 ? float32.filter((_, i) => i % factor === 0) : float32; const int16 = new Int16Array(downsampled.length); for (let i = 0; i < downsampled.length; i++) { const sample = Math.max(-1, Math.min(1, downsampled[i])); int16[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff; } chunk = int16.buffer; } else if (Array.isArray(ev?.data)) { const bytes = new Uint8Array(ev.data.length); for (let i = 0; i < ev.data.length; i++) { const value = ev.data[i]; bytes[i] = value < 0 ? value + 256 : value; } const view = new DataView(bytes.buffer); const int16 = new Int16Array(bytes.length / 2); for (let i = 0; i < int16.length; i++) { int16[i] = view.getInt16(i * 2, true); } chunk = int16.buffer; } if (!chunk) { return; } try { socket.send(chunk); } catch (err) { onErrorRef.current?.(err); } }, [onErrorRef]); const handleSocketMessage = (0, _react.useCallback)(ev => { if (typeof ev.data === 'string') { try { const message = JSON.parse(ev.data); onMessageRef.current?.(message); switch (message.type) { case 'Welcome': if (hasKeys(message, ['request_id'])) { onWelcomeRef.current?.(message); } break; case 'SettingsApplied': onSettingsAppliedRef.current?.(message); break; case 'ConversationText': if (hasKeys(message, ['role', 'content'])) { onConversationTextRef.current?.(message); } break; case 'AgentThinking': if (hasKeys(message, ['content'])) { onAgentThinkingRef.current?.(message); } break; case 'AgentStartedSpeaking': onAgentStartedSpeakingRef.current?.(message); break; case 'AgentAudioDone': onAgentAudioDoneRef.current?.(message); break; case 'UserStartedSpeaking': onUserStartedSpeakingRef.current?.(message); break; case 'FunctionCallRequest': if (hasKeys(message, ['functions'])) { onFunctionCallRequestRef.current?.(message); } break; case 'FunctionCallResponse': if (hasKeys(message, ['id', 'name'])) { onFunctionCallResponseRef.current?.(message); } break; case 'PromptUpdated': onPromptUpdatedRef.current?.(message); break; case 'SpeakUpdated': onSpeakUpdatedRef.current?.(message); break; case 'Audio': case 'AudioConfig': // Audio responses are ignored in text-only mode. break; case 'InjectionRefused': if (hasKeys(message, ['message'])) { onInjectionRefusedRef.current?.(message); } break; case 'Warning': if (hasKeys(message, ['description'])) { onWarningRef.current?.(message); } break; case 'Error': { const description = typeof message.description === 'string' ? message.description : undefined; const code = typeof message.code === 'string' ? message.code : undefined; if (description || code) { onServerErrorRef.current?.(message); } onErrorRef.current?.(new Error(description ?? code ?? 'Voice agent error')); } break; default: break; } } catch (err) { onErrorRef.current?.(err); } return; } const buffer = ensureArrayBuffer(ev.data); if (buffer) { // Binary audio responses are ignored in text-only mode. } }, [onAgentAudioDoneRef, onAgentStartedSpeakingRef, onAgentThinkingRef, onConversationTextRef, onErrorRef, onFunctionCallRequestRef, onFunctionCallResponseRef, onInjectionRefusedRef, onMessageRef, onPromptUpdatedRef, onServerErrorRef, onSettingsAppliedRef, onSpeakUpdatedRef, onUserStartedSpeakingRef, onWarningRef]); const sendJsonMessage = (0, _react.useCallback)(message => { const socket = ws.current; if (!socket || socket.readyState !== WebSocket.OPEN) { return false; } try { socket.send(JSON.stringify(message)); return true; } catch (err) { onErrorRef.current?.(err); return false; } }, []); const sendBinary = (0, _react.useCallback)(chunk => { const socket = ws.current; if (!socket || socket.readyState !== WebSocket.OPEN) { return false; } let payload = null; if (chunk instanceof ArrayBuffer) { payload = chunk; } else if (chunk instanceof Uint8Array) { if (chunk.buffer instanceof ArrayBuffer) { payload = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength); } else { const copy = new Uint8Array(chunk.byteLength); copy.set(chunk); payload = copy.buffer; } } else if (Array.isArray(chunk)) { const uint = new Uint8Array(chunk.length); for (let i = 0; i < chunk.length; i++) { uint[i] = chunk[i]; } payload = uint.buffer; } if (!payload) return false; try { socket.send(payload); return true; } catch (err) { onErrorRef.current?.(err); return false; } }, []); const connect = (0, _react.useCallback)(async overrideSettings => { cleanup(); onBeforeConnectRef.current?.(); const apiKey = globalThis.__DEEPGRAM_API_KEY__; if (!apiKey) throw new Error('Deepgram API key missing'); const shouldCaptureMic = autoStartMicRef.current; if (shouldCaptureMic) { const granted = await (0, _askMicPermission.askMicPermission)(); if (!granted) { throw new Error('Microphone permission denied'); } await _NativeDeepgram.Deepgram.startRecording(); microphoneActive.current = true; const emitter = new _reactNative.NativeEventEmitter(_reactNative.NativeModules.Deepgram); if (eventName) { audioSub.current = emitter.addListener(eventName, handleMicChunk); } } const sanitizedDefault = sanitizeSettings(defaultSettingsRef.current); const sanitizedOverride = sanitizeSettings(overrideSettings); const mergedSettings = { type: 'Settings', ...(sanitizedDefault ?? {}), ...(sanitizedOverride ?? {}) }; const targetSampleRate = overrideSettings?.audio?.input?.sample_rate ?? defaultSettingsRef.current?.audio?.input?.sample_rate ?? DEFAULT_INPUT_SAMPLE_RATE; currentDownsample.current = downsampleFactor ?? computeDownsampleFactor(targetSampleRate); const socket = new WebSocket(endpointRef.current, undefined, { headers: { Authorization: `Token ${apiKey}` } }); socket.binaryType = 'arraybuffer'; ws.current = socket; socket.onopen = () => { sendJsonMessage(mergedSettings); onConnectRef.current?.(); }; socket.onmessage = handleSocketMessage; socket.onerror = err => { onErrorRef.current?.(err); }; socket.onclose = event => { cleanup(); onCloseRef.current?.(event); }; }, [cleanup, downsampleFactor, handleMicChunk, handleSocketMessage, sanitizeSettings, sendJsonMessage]); const disconnect = (0, _react.useCallback)(() => { cleanup(); }, [cleanup]); const sendSettings = (0, _react.useCallback)(settings => { const sanitized = sanitizeSettings(settings); return sendJsonMessage({ type: 'Settings', ...(sanitized ?? {}) }); }, [sanitizeSettings, sendJsonMessage]); const injectUserMessage = (0, _react.useCallback)(content => sendJsonMessage({ type: 'InjectUserMessage', content }), [sendJsonMessage]); const injectAgentMessage = (0, _react.useCallback)(message => sendJsonMessage({ type: 'InjectAgentMessage', message }), [sendJsonMessage]); const sendFunctionCallResponse = (0, _react.useCallback)(response => sendJsonMessage({ type: 'FunctionCallResponse', ...response }), [sendJsonMessage]); const sendKeepAlive = (0, _react.useCallback)(() => sendJsonMessage({ type: 'KeepAlive' }), [sendJsonMessage]); const updatePrompt = (0, _react.useCallback)(prompt => sendJsonMessage({ type: 'UpdatePrompt', prompt }), [sendJsonMessage]); const sendMessage = (0, _react.useCallback)(message => sendJsonMessage(message), [sendJsonMessage]); const isConnected = (0, _react.useCallback)(() => ws.current?.readyState === WebSocket.OPEN, []); return { connect, disconnect, sendMessage, sendSettings, injectUserMessage, injectAgentMessage, sendFunctionCallResponse, sendKeepAlive, updatePrompt, sendMedia: sendBinary, isConnected }; } //# sourceMappingURL=useDeepgramVoiceAgent.js.map