react-native-deepgram
Version:
React Native SDK for Deepgram's AI-powered speech-to-text, real-time transcription, and text intelligence APIs. Supports live audio streaming, file transcription, sentiment analysis, and topic detection for iOS and Android.
507 lines (504 loc) • 17.6 kB
JavaScript
;
import { useRef, useCallback, useState, useEffect } from 'react';
import { NativeEventEmitter, NativeModules, Platform } from 'react-native';
import { Deepgram } from "./NativeDeepgram.js";
import { askMicPermission } from "./helpers/askMicPermission.js";
import { DEEPGRAM_BASEURL, DEEPGRAM_BASEWSS, DEEPGRAM_V2_BASEWSS } from "./constants/index.js";
import { buildParams } from "./helpers/index.js";
const DEFAULT_SAMPLE_RATE = 16_000;
const BASE_NATIVE_SAMPLE_RATE = 16_000;
const computeDownsampleFactor = (target, base = BASE_NATIVE_SAMPLE_RATE) => {
if (!target || target >= base || base <= 0) {
return 1;
}
const ratio = Math.round(base / target);
return ratio > 0 ? ratio : 1;
};
const downsampleInt16 = (data, factor) => {
if (factor <= 1 || data.length < factor) {
return data;
}
const downsampled = new Int16Array(Math.floor(data.length / factor));
for (let i = 0; i < downsampled.length; i++) {
downsampled[i] = data[i * factor];
}
return downsampled;
};
export function useDeepgramSpeechToText({
onBeforeStart = () => {},
onStart = () => {},
onTranscript = () => {},
onError = () => {},
onEnd = () => {},
onBeforeTranscribe = () => {},
onTranscribeSuccess = () => {},
onTranscribeError = () => {},
live = {},
prerecorded = {},
trackState = false,
trackTranscript = false
} = {}) {
const [internalState, setInternalState] = useState({
status: 'idle',
error: null
});
const ws = useRef(null);
const audioSub = useRef(null);
const apiVersionRef = useRef('v1');
const nativeInputSampleRateRef = useRef(BASE_NATIVE_SAMPLE_RATE);
const targetSampleRateRef = useRef(DEFAULT_SAMPLE_RATE);
const downsampleFactorRef = useRef(1);
const lastPartialTranscriptRef = useRef('');
const lastFinalTranscriptRef = useRef('');
// Transcript tracking state
const [internalTranscript, setInternalTranscript] = useState('');
const [internalInterimTranscript, setInternalInterimTranscript] = useState('');
const closeEverything = useCallback(() => {
if (audioSub.current) {
audioSub.current.remove();
audioSub.current = null;
}
Deepgram.stopRecording().catch(() => {});
nativeInputSampleRateRef.current = BASE_NATIVE_SAMPLE_RATE;
targetSampleRateRef.current = DEFAULT_SAMPLE_RATE;
downsampleFactorRef.current = 1;
lastPartialTranscriptRef.current = '';
lastFinalTranscriptRef.current = '';
if (apiVersionRef.current === 'v2' && ws.current?.readyState === WebSocket.OPEN) {
try {
ws.current.send(JSON.stringify({
type: 'CloseStream'
}));
} catch {
// ignore close errors
}
}
ws.current?.close(1000, 'cleanup');
ws.current = null;
apiVersionRef.current = 'v1';
if (trackState) {
setInternalState(prev => ({
...prev,
status: 'idle'
}));
}
if (trackTranscript) {
setInternalTranscript('');
setInternalInterimTranscript('');
}
}, [trackState, trackTranscript]);
const emitTranscript = useCallback((transcript, isFinal, raw) => {
if (typeof onTranscript !== 'function') {
return;
}
if (typeof transcript !== 'string') {
return;
}
const normalized = transcript.trim();
if (!normalized) {
return;
}
if (isFinal) {
if (lastFinalTranscriptRef.current === normalized) {
return;
}
lastFinalTranscriptRef.current = normalized;
lastPartialTranscriptRef.current = '';
} else {
if (lastPartialTranscriptRef.current === normalized) {
return;
}
lastPartialTranscriptRef.current = normalized;
}
const info = {
isFinal: !!isFinal,
raw
};
if (trackTranscript) {
if (isFinal) {
setInternalTranscript(prev => {
const next = prev ? `${prev} ${normalized}` : normalized;
return next.trim();
});
setInternalInterimTranscript('');
} else {
setInternalInterimTranscript(normalized);
}
}
onTranscript(normalized, info);
if (isFinal) {
lastPartialTranscriptRef.current = '';
}
}, [onTranscript, trackTranscript]);
const startListening = useCallback(async (overrideOptions = {}) => {
try {
onBeforeStart();
if (trackState) {
setInternalState({
status: 'loading',
error: null
});
}
lastPartialTranscriptRef.current = '';
lastFinalTranscriptRef.current = '';
const granted = await askMicPermission();
if (!granted) throw new Error('Microphone permission denied');
await Deepgram.startRecording();
const apiKey = globalThis.__DEEPGRAM_API_KEY__;
if (!apiKey) throw new Error('Deepgram API key missing');
const merged = {
encoding: 'linear16',
sampleRate: DEFAULT_SAMPLE_RATE,
model: 'nova-2',
apiVersion: 'v1',
...live,
...overrideOptions
};
targetSampleRateRef.current = typeof merged.sampleRate === 'number' && merged.sampleRate > 0 ? merged.sampleRate : DEFAULT_SAMPLE_RATE;
downsampleFactorRef.current = computeDownsampleFactor(targetSampleRateRef.current, nativeInputSampleRateRef.current);
if (merged.apiVersion === 'v2' && !merged.model) {
merged.model = 'flux-general-en';
}
const isV2 = merged.apiVersion === 'v2';
apiVersionRef.current = isV2 ? 'v2' : 'v1';
const query = {
callback: merged.callback,
callback_method: merged.callbackMethod,
channels: merged.channels,
diarize: merged.diarize,
dictation: merged.dictation,
encoding: merged.encoding,
endpointing: merged.endpointing,
filler_words: merged.fillerWords,
interim_results: merged.interimResults,
keyterm: merged.keyterm,
keywords: merged.keywords,
language: merged.language,
mip_opt_out: merged.mipOptOut,
model: merged.model,
multichannel: merged.multichannel,
numerals: merged.numerals,
profanity_filter: merged.profanityFilter,
punctuate: merged.punctuate,
replace: merged.replace,
sample_rate: merged.sampleRate,
search: merged.search,
smart_format: merged.smartFormat,
tag: merged.tag,
utterance_end_ms: merged.utteranceEndMs,
vad_events: merged.vadEvents,
version: merged.version
};
if (isV2) {
query.eager_eot_threshold = merged.eagerEotThreshold;
query.eot_threshold = merged.eotThreshold;
query.eot_timeout_ms = merged.eotTimeoutMs;
}
if (merged.redact) {
query.redact = Array.isArray(merged.redact) ? merged.redact : [merged.redact];
}
if (merged.extra) {
Object.entries(merged.extra).forEach(([key, value]) => {
query[`extra.${key}`] = value;
});
}
const params = buildParams(query);
const baseWss = isV2 ? DEEPGRAM_V2_BASEWSS : DEEPGRAM_BASEWSS;
const baseListenUrl = `${baseWss}/listen`;
const url = params ? `${baseListenUrl}?${params}` : baseListenUrl;
ws.current = new WebSocket(url, undefined, {
headers: {
Authorization: `Token ${apiKey}`
}
});
ws.current.onopen = () => {
onStart();
if (trackState) {
setInternalState({
status: 'listening',
error: null
});
}
};
const emitter = new NativeEventEmitter(NativeModules.Deepgram);
audioSub.current = emitter.addListener(Platform.select({
ios: 'DeepgramAudioPCM',
android: 'AudioChunk'
}), ev => {
if (typeof ev?.sampleRate === 'number' && ev.sampleRate > 0) {
if (ev.sampleRate !== nativeInputSampleRateRef.current) {
nativeInputSampleRateRef.current = ev.sampleRate;
downsampleFactorRef.current = computeDownsampleFactor(targetSampleRateRef.current, nativeInputSampleRateRef.current);
}
}
const factor = downsampleFactorRef.current;
let chunk;
if (typeof ev?.b64 === 'string') {
const bytes = Uint8Array.from(atob(ev.b64), c => c.charCodeAt(0));
let int16 = new Int16Array(bytes.buffer);
int16 = downsampleInt16(int16, factor);
chunk = int16.buffer;
} else if (Array.isArray(ev?.data)) {
const bytes = new Uint8Array(ev.data.length);
for (let i = 0; i < ev.data.length; i++) {
const v = ev.data[i];
bytes[i] = v < 0 ? v + 256 : v;
}
const view = new DataView(bytes.buffer);
const int16 = new Int16Array(bytes.length / 2);
for (let i = 0; i < int16.length; i++) {
int16[i] = view.getInt16(i * 2, true);
}
const downsampled = downsampleInt16(int16, factor);
chunk = downsampled.buffer;
}
if (chunk && ws.current?.readyState === WebSocket.OPEN) {
ws.current.send(chunk);
}
});
ws.current.onmessage = ev => {
if (typeof ev.data === 'string') {
try {
const msg = JSON.parse(ev.data);
if (isV2) {
if (msg.type === 'Error') {
const description = msg.description || 'Deepgram stream error';
onError(new Error(description));
if (trackState) {
setInternalState({
status: 'error',
error: new Error(description)
});
}
closeEverything();
return;
}
const transcript = msg.transcript;
if (typeof transcript === 'string' && transcript.length > 0) {
const type = typeof msg.type === 'string' ? msg.type.toLowerCase() : undefined;
const isFinal = msg.is_final === true || msg.speech_final === true || msg.finished === true || type === 'utteranceend' || type === 'speechfinal' || type === 'speech.end' || typeof type === 'string' && type.includes('final');
emitTranscript(transcript, Boolean(isFinal), msg);
}
return;
}
const transcript = msg.channel?.alternatives?.[0]?.transcript;
if (typeof transcript === 'string') {
const isFinal = msg.is_final === true || msg.speech_final === true;
emitTranscript(transcript, Boolean(isFinal), msg);
}
} catch {
// non-JSON or unexpected format
}
}
};
ws.current.onerror = err => {
onError(err);
if (trackState) {
setInternalState({
status: 'error',
error: err instanceof Error ? err : new Error(String(err))
});
}
};
ws.current.onclose = () => {
onEnd();
closeEverything();
};
} catch (err) {
// Check if this is a concurrent recording error
const errorMessage = err instanceof Error ? err.message : String(err);
const isConcurrentError = errorMessage.includes('concurrent_recording_not_supported') || errorMessage.includes('another recording session is already active');
if (isConcurrentError) {
const friendlyError = new Error('Cannot start recording: another recording session is already active. ' + 'Only one live listening session is supported at a time. ' + 'Please stop the active session before starting a new one.');
onError(friendlyError);
if (trackState) {
setInternalState({
status: 'error',
error: friendlyError
});
}
} else {
onError(err);
if (trackState) {
setInternalState({
status: 'error',
error: err instanceof Error ? err : new Error(String(err))
});
}
closeEverything();
}
}
}, [emitTranscript, onBeforeStart, onStart, onError, onEnd, live, closeEverything, trackState]);
const stopListening = useCallback(() => {
try {
closeEverything();
onEnd();
} catch (err) {
onError(err);
if (trackState) {
setInternalState({
status: 'error',
error: err instanceof Error ? err : new Error(String(err))
});
}
}
}, [onEnd, onError, closeEverything, trackState]);
const transcribeFile = useCallback(async (file, overrideOptions = {}) => {
onBeforeTranscribe();
if (trackState) {
setInternalState({
status: 'transcribing',
error: null
});
}
try {
const apiKey = globalThis.__DEEPGRAM_API_KEY__;
if (!apiKey) throw new Error('Deepgram API key missing');
const merged = {
...prerecorded,
...overrideOptions
};
const query = {
callback: merged.callback,
callback_method: merged.callbackMethod,
sentiment: merged.sentiment,
summarize: merged.summarize,
tag: merged.tag,
topics: merged.topics,
custom_topic_mode: merged.customTopicMode,
intents: merged.intents,
custom_intent_mode: merged.customIntentMode,
detect_entities: merged.detectEntities,
diarize: merged.diarize,
dictation: merged.dictation,
encoding: merged.encoding,
filler_words: merged.fillerWords,
keyterm: merged.keyterm,
keywords: merged.keywords,
language: merged.language,
measurements: merged.measurements,
model: merged.model,
multichannel: merged.multichannel,
numerals: merged.numerals,
paragraphs: merged.paragraphs,
profanity_filter: merged.profanityFilter,
punctuate: merged.punctuate,
replace: merged.replace,
search: merged.search,
smart_format: merged.smartFormat,
utterances: merged.utterances,
utt_split: merged.uttSplit,
version: merged.version
};
if (merged.customTopic) {
query.custom_topic = merged.customTopic;
}
if (merged.customIntent) {
query.custom_intent = merged.customIntent;
}
if (merged.detectLanguage !== undefined) {
if (typeof merged.detectLanguage === 'boolean') {
query.detect_language = merged.detectLanguage;
} else {
query.detect_language = merged.detectLanguage;
}
}
if (merged.redact) {
query.redact = Array.isArray(merged.redact) ? merged.redact : [merged.redact];
}
if (merged.extra) {
if (typeof merged.extra === 'string' || Array.isArray(merged.extra)) {
query.extra = merged.extra;
} else {
Object.entries(merged.extra).forEach(([key, value]) => {
if (value == null) return;
query[`extra.${key}`] = value;
});
}
}
const params = buildParams(query);
const baseUrl = `${DEEPGRAM_BASEURL}/listen`;
const url = params ? `${baseUrl}?${params}` : baseUrl;
const headers = {
Authorization: `Token ${apiKey}`
};
let body;
if (typeof file === 'string') {
headers['Content-Type'] = 'application/json';
body = JSON.stringify({
url: file
});
} else if (typeof file === 'object' && file !== null && 'url' in file) {
headers['Content-Type'] = 'application/json';
body = JSON.stringify({
url: file.url
});
} else {
const formData = new FormData();
if (file instanceof Blob) {
formData.append('audio', file, 'recording.wav');
} else {
formData.append('audio', {
uri: file.uri,
name: file.name || 'recording.wav',
type: file.type || 'audio/wav'
});
}
body = formData;
}
const res = await fetch(url, {
method: 'POST',
headers,
body
});
if (!res.ok) {
const errBody = await res.text();
throw new Error(`HTTP ${res.status}: ${errBody}`);
}
const json = await res.json();
const transcript = json.results?.channels?.[0]?.alternatives?.[0]?.transcript;
if (transcript) {
onTranscribeSuccess(transcript);
if (trackState) {
setInternalState({
status: 'idle',
error: null
});
}
} else {
throw new Error('No transcript present in Deepgram response');
}
} catch (err) {
onTranscribeError(err);
if (trackState) {
setInternalState({
status: 'error',
error: err instanceof Error ? err : new Error(String(err))
});
}
}
}, [onBeforeTranscribe, onTranscribeSuccess, onTranscribeError, prerecorded, trackState]);
// Cleanup on unmount - stop recording if component unmounts while listening
useEffect(() => {
return () => {
// Only cleanup if we're actually listening (not for file transcription)
if (ws.current || audioSub.current) {
closeEverything();
}
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
return {
startListening,
stopListening,
transcribeFile,
...(trackState ? {
state: internalState
} : {}),
...(trackTranscript ? {
transcript: internalTranscript,
interimTranscript: internalInterimTranscript
} : {})
};
}
//# sourceMappingURL=useDeepgramSpeechToText.js.map