react-native-deepgram
Version:
React Native SDK for Deepgram's AI-powered speech-to-text, real-time transcription, and text intelligence APIs. Supports live audio streaming, file transcription, sentiment analysis, and topic detection for iOS and Android.
456 lines (455 loc) • 16.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.useDeepgramVoiceAgent = useDeepgramVoiceAgent;
var _react = require("react");
var _reactNative = require("react-native");
var _NativeDeepgram = require("./NativeDeepgram.js");
var _askMicPermission = require("./helpers/askMicPermission.js");
const DEFAULT_AGENT_ENDPOINT = 'wss://agent.deepgram.com/v1/agent/converse';
const DEFAULT_INPUT_SAMPLE_RATE = 24_000;
const BASE_NATIVE_SAMPLE_RATE = 48_000;
const eventName = _reactNative.Platform.select({
ios: 'DeepgramAudioPCM',
android: 'AudioChunk',
default: 'DeepgramAudioPCM'
});
const ensureArrayBuffer = data => {
if (!data) return null;
if (data instanceof ArrayBuffer) return data;
if (ArrayBuffer.isView(data)) {
const view = data;
if (view.buffer instanceof ArrayBuffer) {
return view.buffer.slice(view.byteOffset, view.byteOffset + view.byteLength);
}
const copy = new Uint8Array(view.byteLength);
copy.set(new Uint8Array(view.buffer, view.byteOffset, view.byteLength));
return copy.buffer;
}
return null;
};
const hasKeys = (value, keys) => typeof value === 'object' && value !== null && keys.every(key => key in value);
const computeDownsampleFactor = target => {
if (!target || target >= BASE_NATIVE_SAMPLE_RATE) {
return 1;
}
const ratio = Math.round(BASE_NATIVE_SAMPLE_RATE / target);
return ratio > 0 ? ratio : 1;
};
function useDeepgramVoiceAgent({
endpoint = DEFAULT_AGENT_ENDPOINT,
defaultSettings,
autoStartMicrophone = true,
downsampleFactor,
onBeforeConnect,
onConnect,
onClose,
onError,
onMessage,
onWelcome,
onSettingsApplied,
onConversationText,
onAgentThinking,
onAgentStartedSpeaking,
onAgentAudioDone,
onUserStartedSpeaking,
onFunctionCallRequest,
onFunctionCallResponse,
onPromptUpdated,
onSpeakUpdated,
onInjectionRefused,
onWarning,
onServerError
} = {}) {
const ws = (0, _react.useRef)(null);
const audioSub = (0, _react.useRef)(null);
const currentDownsample = (0, _react.useRef)(downsampleFactor ?? computeDownsampleFactor(DEFAULT_INPUT_SAMPLE_RATE));
const microphoneActive = (0, _react.useRef)(false);
const defaultSettingsRef = (0, _react.useRef)(defaultSettings);
const endpointRef = (0, _react.useRef)(endpoint);
const onBeforeConnectRef = (0, _react.useRef)(onBeforeConnect);
const onConnectRef = (0, _react.useRef)(onConnect);
const onCloseRef = (0, _react.useRef)(onClose);
const onErrorRef = (0, _react.useRef)(onError);
const onMessageRef = (0, _react.useRef)(onMessage);
const onWelcomeRef = (0, _react.useRef)(onWelcome);
const onSettingsAppliedRef = (0, _react.useRef)(onSettingsApplied);
const onConversationTextRef = (0, _react.useRef)(onConversationText);
const onAgentThinkingRef = (0, _react.useRef)(onAgentThinking);
const onAgentStartedSpeakingRef = (0, _react.useRef)(onAgentStartedSpeaking);
const onAgentAudioDoneRef = (0, _react.useRef)(onAgentAudioDone);
const onUserStartedSpeakingRef = (0, _react.useRef)(onUserStartedSpeaking);
const onFunctionCallRequestRef = (0, _react.useRef)(onFunctionCallRequest);
const onFunctionCallResponseRef = (0, _react.useRef)(onFunctionCallResponse);
const onPromptUpdatedRef = (0, _react.useRef)(onPromptUpdated);
const onSpeakUpdatedRef = (0, _react.useRef)(onSpeakUpdated);
const onInjectionRefusedRef = (0, _react.useRef)(onInjectionRefused);
const onWarningRef = (0, _react.useRef)(onWarning);
const onServerErrorRef = (0, _react.useRef)(onServerError);
const autoStartMicRef = (0, _react.useRef)(autoStartMicrophone);
const sanitizeAudioSettings = (0, _react.useCallback)(audio => {
if (!audio) {
return undefined;
}
const sanitized = {};
if (audio.input) {
sanitized.input = {
...audio.input
};
}
Object.entries(audio).forEach(([key, value]) => {
if (key === 'input' || key === 'output') {
return;
}
sanitized[key] = value;
});
return Object.keys(sanitized).length > 0 ? sanitized : undefined;
}, []);
const sanitizeAgentConfig = (0, _react.useCallback)(agent => {
if (!agent) {
return undefined;
}
const sanitized = {};
Object.entries(agent).forEach(([key, value]) => {
if (key === 'speak') {
return;
}
sanitized[key] = value;
});
return Object.keys(sanitized).length > 0 ? sanitized : undefined;
}, []);
const sanitizeSettings = (0, _react.useCallback)(settings => {
if (!settings) {
return undefined;
}
const sanitized = {};
Object.entries(settings).forEach(([key, value]) => {
if (key === 'audio') {
const audio = sanitizeAudioSettings(value);
if (audio) {
sanitized.audio = audio;
}
return;
}
if (key === 'agent') {
const agent = sanitizeAgentConfig(value);
if (agent) {
sanitized.agent = agent;
}
return;
}
sanitized[key] = value;
});
return sanitized;
}, [sanitizeAgentConfig, sanitizeAudioSettings]);
defaultSettingsRef.current = defaultSettings;
endpointRef.current = endpoint;
onBeforeConnectRef.current = onBeforeConnect;
onConnectRef.current = onConnect;
onCloseRef.current = onClose;
onErrorRef.current = onError;
onMessageRef.current = onMessage;
onWelcomeRef.current = onWelcome;
onSettingsAppliedRef.current = onSettingsApplied;
onConversationTextRef.current = onConversationText;
onAgentThinkingRef.current = onAgentThinking;
onAgentStartedSpeakingRef.current = onAgentStartedSpeaking;
onAgentAudioDoneRef.current = onAgentAudioDone;
onUserStartedSpeakingRef.current = onUserStartedSpeaking;
onFunctionCallRequestRef.current = onFunctionCallRequest;
onFunctionCallResponseRef.current = onFunctionCallResponse;
onPromptUpdatedRef.current = onPromptUpdated;
onSpeakUpdatedRef.current = onSpeakUpdated;
onInjectionRefusedRef.current = onInjectionRefused;
onWarningRef.current = onWarning;
onServerErrorRef.current = onServerError;
autoStartMicRef.current = autoStartMicrophone;
if (downsampleFactor != null) {
currentDownsample.current = downsampleFactor;
}
const cleanup = (0, _react.useCallback)(() => {
audioSub.current?.remove();
audioSub.current = null;
if (microphoneActive.current) {
_NativeDeepgram.Deepgram.stopRecording().catch(() => {});
microphoneActive.current = false;
}
const socket = ws.current;
if (socket) {
ws.current = null;
try {
if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) {
socket.close(1000, 'cleanup');
} else {
socket.close();
}
} catch {
// ignore socket close errors
}
}
}, []);
(0, _react.useEffect)(() => () => cleanup(), [cleanup]);
const handleMicChunk = (0, _react.useCallback)(ev => {
const socket = ws.current;
if (!socket || socket.readyState !== WebSocket.OPEN) {
return;
}
const factor = currentDownsample.current ?? 1;
let chunk = null;
if (typeof ev?.b64 === 'string') {
const binary = Uint8Array.from(atob(ev.b64), c => c.charCodeAt(0));
const float32 = new Float32Array(binary.buffer);
const downsampled = factor > 1 ? float32.filter((_, i) => i % factor === 0) : float32;
const int16 = new Int16Array(downsampled.length);
for (let i = 0; i < downsampled.length; i++) {
const sample = Math.max(-1, Math.min(1, downsampled[i]));
int16[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
}
chunk = int16.buffer;
} else if (Array.isArray(ev?.data)) {
const bytes = new Uint8Array(ev.data.length);
for (let i = 0; i < ev.data.length; i++) {
const value = ev.data[i];
bytes[i] = value < 0 ? value + 256 : value;
}
const view = new DataView(bytes.buffer);
const int16 = new Int16Array(bytes.length / 2);
for (let i = 0; i < int16.length; i++) {
int16[i] = view.getInt16(i * 2, true);
}
chunk = int16.buffer;
}
if (!chunk) {
return;
}
try {
socket.send(chunk);
} catch (err) {
onErrorRef.current?.(err);
}
}, [onErrorRef]);
const handleSocketMessage = (0, _react.useCallback)(ev => {
if (typeof ev.data === 'string') {
try {
const message = JSON.parse(ev.data);
onMessageRef.current?.(message);
switch (message.type) {
case 'Welcome':
if (hasKeys(message, ['request_id'])) {
onWelcomeRef.current?.(message);
}
break;
case 'SettingsApplied':
onSettingsAppliedRef.current?.(message);
break;
case 'ConversationText':
if (hasKeys(message, ['role', 'content'])) {
onConversationTextRef.current?.(message);
}
break;
case 'AgentThinking':
if (hasKeys(message, ['content'])) {
onAgentThinkingRef.current?.(message);
}
break;
case 'AgentStartedSpeaking':
onAgentStartedSpeakingRef.current?.(message);
break;
case 'AgentAudioDone':
onAgentAudioDoneRef.current?.(message);
break;
case 'UserStartedSpeaking':
onUserStartedSpeakingRef.current?.(message);
break;
case 'FunctionCallRequest':
if (hasKeys(message, ['functions'])) {
onFunctionCallRequestRef.current?.(message);
}
break;
case 'FunctionCallResponse':
if (hasKeys(message, ['id', 'name'])) {
onFunctionCallResponseRef.current?.(message);
}
break;
case 'PromptUpdated':
onPromptUpdatedRef.current?.(message);
break;
case 'SpeakUpdated':
onSpeakUpdatedRef.current?.(message);
break;
case 'Audio':
case 'AudioConfig':
// Audio responses are ignored in text-only mode.
break;
case 'InjectionRefused':
if (hasKeys(message, ['message'])) {
onInjectionRefusedRef.current?.(message);
}
break;
case 'Warning':
if (hasKeys(message, ['description'])) {
onWarningRef.current?.(message);
}
break;
case 'Error':
{
const description = typeof message.description === 'string' ? message.description : undefined;
const code = typeof message.code === 'string' ? message.code : undefined;
if (description || code) {
onServerErrorRef.current?.(message);
}
onErrorRef.current?.(new Error(description ?? code ?? 'Voice agent error'));
}
break;
default:
break;
}
} catch (err) {
onErrorRef.current?.(err);
}
return;
}
const buffer = ensureArrayBuffer(ev.data);
if (buffer) {
// Binary audio responses are ignored in text-only mode.
}
}, [onAgentAudioDoneRef, onAgentStartedSpeakingRef, onAgentThinkingRef, onConversationTextRef, onErrorRef, onFunctionCallRequestRef, onFunctionCallResponseRef, onInjectionRefusedRef, onMessageRef, onPromptUpdatedRef, onServerErrorRef, onSettingsAppliedRef, onSpeakUpdatedRef, onUserStartedSpeakingRef, onWarningRef]);
const sendJsonMessage = (0, _react.useCallback)(message => {
const socket = ws.current;
if (!socket || socket.readyState !== WebSocket.OPEN) {
return false;
}
try {
socket.send(JSON.stringify(message));
return true;
} catch (err) {
onErrorRef.current?.(err);
return false;
}
}, []);
const sendBinary = (0, _react.useCallback)(chunk => {
const socket = ws.current;
if (!socket || socket.readyState !== WebSocket.OPEN) {
return false;
}
let payload = null;
if (chunk instanceof ArrayBuffer) {
payload = chunk;
} else if (chunk instanceof Uint8Array) {
if (chunk.buffer instanceof ArrayBuffer) {
payload = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength);
} else {
const copy = new Uint8Array(chunk.byteLength);
copy.set(chunk);
payload = copy.buffer;
}
} else if (Array.isArray(chunk)) {
const uint = new Uint8Array(chunk.length);
for (let i = 0; i < chunk.length; i++) {
uint[i] = chunk[i];
}
payload = uint.buffer;
}
if (!payload) return false;
try {
socket.send(payload);
return true;
} catch (err) {
onErrorRef.current?.(err);
return false;
}
}, []);
const connect = (0, _react.useCallback)(async overrideSettings => {
cleanup();
onBeforeConnectRef.current?.();
const apiKey = globalThis.__DEEPGRAM_API_KEY__;
if (!apiKey) throw new Error('Deepgram API key missing');
const shouldCaptureMic = autoStartMicRef.current;
if (shouldCaptureMic) {
const granted = await (0, _askMicPermission.askMicPermission)();
if (!granted) {
throw new Error('Microphone permission denied');
}
await _NativeDeepgram.Deepgram.startRecording();
microphoneActive.current = true;
const emitter = new _reactNative.NativeEventEmitter(_reactNative.NativeModules.Deepgram);
if (eventName) {
audioSub.current = emitter.addListener(eventName, handleMicChunk);
}
}
const sanitizedDefault = sanitizeSettings(defaultSettingsRef.current);
const sanitizedOverride = sanitizeSettings(overrideSettings);
const mergedSettings = {
type: 'Settings',
...(sanitizedDefault ?? {}),
...(sanitizedOverride ?? {})
};
const targetSampleRate = overrideSettings?.audio?.input?.sample_rate ?? defaultSettingsRef.current?.audio?.input?.sample_rate ?? DEFAULT_INPUT_SAMPLE_RATE;
currentDownsample.current = downsampleFactor ?? computeDownsampleFactor(targetSampleRate);
const socket = new WebSocket(endpointRef.current, undefined, {
headers: {
Authorization: `Token ${apiKey}`
}
});
socket.binaryType = 'arraybuffer';
ws.current = socket;
socket.onopen = () => {
sendJsonMessage(mergedSettings);
onConnectRef.current?.();
};
socket.onmessage = handleSocketMessage;
socket.onerror = err => {
onErrorRef.current?.(err);
};
socket.onclose = event => {
cleanup();
onCloseRef.current?.(event);
};
}, [cleanup, downsampleFactor, handleMicChunk, handleSocketMessage, sanitizeSettings, sendJsonMessage]);
const disconnect = (0, _react.useCallback)(() => {
cleanup();
}, [cleanup]);
const sendSettings = (0, _react.useCallback)(settings => {
const sanitized = sanitizeSettings(settings);
return sendJsonMessage({
type: 'Settings',
...(sanitized ?? {})
});
}, [sanitizeSettings, sendJsonMessage]);
const injectUserMessage = (0, _react.useCallback)(content => sendJsonMessage({
type: 'InjectUserMessage',
content
}), [sendJsonMessage]);
const injectAgentMessage = (0, _react.useCallback)(message => sendJsonMessage({
type: 'InjectAgentMessage',
message
}), [sendJsonMessage]);
const sendFunctionCallResponse = (0, _react.useCallback)(response => sendJsonMessage({
type: 'FunctionCallResponse',
...response
}), [sendJsonMessage]);
const sendKeepAlive = (0, _react.useCallback)(() => sendJsonMessage({
type: 'KeepAlive'
}), [sendJsonMessage]);
const updatePrompt = (0, _react.useCallback)(prompt => sendJsonMessage({
type: 'UpdatePrompt',
prompt
}), [sendJsonMessage]);
const sendMessage = (0, _react.useCallback)(message => sendJsonMessage(message), [sendJsonMessage]);
const isConnected = (0, _react.useCallback)(() => ws.current?.readyState === WebSocket.OPEN, []);
return {
connect,
disconnect,
sendMessage,
sendSettings,
injectUserMessage,
injectAgentMessage,
sendFunctionCallResponse,
sendKeepAlive,
updatePrompt,
sendMedia: sendBinary,
isConnected
};
}
//# sourceMappingURL=useDeepgramVoiceAgent.js.map