react-native-deepgram

Version:

React Native SDK for Deepgram's AI-powered speech-to-text, real-time transcription, and text intelligence APIs. Supports live audio streaming, file transcription, sentiment analysis, and topic detection for iOS and Android.

github.com/itsRares/react-native-deepgram

itsRares/react-native-deepgram

179 lines (157 loc) • 5.62 kB

text/typescript

import { useRef, useCallback } from 'react'; import { NativeEventEmitter, NativeModules, Platform } from 'react-native'; import { Deepgram } from './NativeDeepgram'; import { askMicPermission } from './helpers/askMicPermission'; import type { UseDeepgramSpeechToTextProps, UseDeepgramSpeechToTextReturn, } from './types'; import { DEEPGRAM_BASEURL, DEEPGRAM_BASEWSS } from './constants'; import { buildParams } from './helpers'; export function useDeepgramSpeechToText({ onBeforeStart = () => {}, onStart = () => {}, onTranscript = () => {}, onError = () => {}, onEnd = () => {}, onBeforeTranscribe = () => {}, onTranscribeSuccess = () => {}, onTranscribeError = () => {}, }: UseDeepgramSpeechToTextProps): UseDeepgramSpeechToTextReturn { const ws = useRef<WebSocket | null>(null); const audioSub = useRef<ReturnType<NativeEventEmitter['addListener']> | null>( null ); const closeEverything = () => { audioSub.current?.remove(); Deepgram.stopRecording().catch(() => {}); ws.current?.close(1000, 'cleanup'); ws.current = null; }; const startListening = useCallback(async () => { try { onBeforeStart(); const granted = await askMicPermission(); if (!granted) throw new Error('Microphone permission denied'); await Deepgram.startRecording(); const apiKey = (globalThis as any).__DEEPGRAM_API_KEY__; if (!apiKey) throw new Error('Deepgram API key missing'); const params = buildParams({ encoding: 'linear16', sample_rate: '16000', }); const url = `${DEEPGRAM_BASEWSS}/listen?${params}`; ws.current = new (WebSocket as any)(url, undefined, { headers: { Authorization: `Token ${apiKey}` }, }); ws.current.onopen = () => onStart(); const emitter = new NativeEventEmitter(NativeModules.Deepgram); audioSub.current = emitter.addListener( Platform.select({ ios: 'DeepgramAudioPCM', android: 'AudioChunk', }) as string, (ev: any) => { let chunk: ArrayBuffer | undefined; if (typeof ev?.b64 === 'string') { const floatBytes = Uint8Array.from(atob(ev.b64), (c) => c.charCodeAt(0) ); const float32 = new Float32Array(floatBytes.buffer); const downsampled = float32.filter((_, i) => i % 3 === 0); const int16 = new Int16Array(downsampled.length); for (let i = 0; i < downsampled.length; i++) { const s = Math.max(-1, Math.min(1, downsampled[i])); int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff; } chunk = int16.buffer; } else if (Array.isArray(ev?.data)) { const bytes = new Uint8Array(ev.data.length); for (let i = 0; i < ev.data.length; i++) { const v = ev.data[i]; bytes[i] = v < 0 ? v + 256 : v; } const view = new DataView(bytes.buffer); const int16 = new Int16Array(bytes.length / 2); for (let i = 0; i < int16.length; i++) { int16[i] = view.getInt16(i * 2, true); } chunk = int16.buffer; } if (chunk && ws.current?.readyState === WebSocket.OPEN) { ws.current.send(chunk); } } ); ws.current.onmessage = (ev) => { if (typeof ev.data === 'string') { try { const msg = JSON.parse(ev.data); const transcript = msg.channel?.alternatives?.[0]?.transcript; if (transcript) onTranscript(transcript); } catch { // non-JSON or unexpected format } } }; ws.current.onerror = onError; ws.current.onclose = () => { onEnd(); closeEverything(); }; } catch (err) { onError(err); closeEverything(); } }, [onBeforeStart, onStart, onTranscript, onError, onEnd]); const stopListening = useCallback(() => { try { closeEverything(); onEnd(); } catch (err) { onError(err); } }, [onEnd, onError]); const transcribeFile = useCallback( async (file: Blob | { uri: string; name?: string; type?: string }) => { onBeforeTranscribe(); try { const apiKey = (globalThis as any).__DEEPGRAM_API_KEY__; if (!apiKey) throw new Error('Deepgram API key missing'); const formData = new FormData(); if (file instanceof Blob) { formData.append('audio', file, 'recording.wav'); } else { formData.append('audio', { uri: file.uri, name: file.name || 'recording.wav', type: file.type || 'audio/wav', } as any); } const res = await fetch(`${DEEPGRAM_BASEURL}/listen`, { method: 'POST', headers: { Authorization: `Token ${apiKey}`, }, body: formData, }); if (!res.ok) { const errBody = await res.text(); throw new Error(`HTTP ${res.status}: ${errBody}`); } const json = await res.json(); const transcript = json.results?.channels?.[0]?.alternatives?.[0]?.transcript; if (transcript) { onTranscribeSuccess(transcript); } else { throw new Error('No transcript present in Deepgram response'); } } catch (err) { onTranscribeError(err); } }, [onBeforeTranscribe, onTranscribeSuccess, onTranscribeError] ); return { startListening, stopListening, transcribeFile }; }