UNPKG

voice-stream

Version:

A powerful React hook for real-time voice streaming, designed for AI-powered applications. Perfect for real-time transcription, voice assistants, and audio processing with features like silence detection and configurable audio processing.

170 lines (169 loc) 9.26 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype); return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (g && (g = 0, op[0] && (_ = 0)), _) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; import { useRef, useCallback, useEffect, useState } from "react"; import { downsampleBuffer } from "../utils/downsample-buffer"; import { int16ArrayToBase64 } from "../utils/int16array-to-base64"; import { SilenceDetector } from "../utils/silence-detection"; import { DEFAULT_TARGET_SAMPLE_RATE, DEFAULT_BUFFER_SIZE, DEFAULT_SILENCE_THRESHOLD, DEFAULT_SILENCE_DURATION, DEFAULT_INCLUDE_DESTINATION, } from "../constants/voice-stream.constants"; export var useVoiceStream = function (options) { var onStartStreaming = options.onStartStreaming, onStopStreaming = options.onStopStreaming, onAudioChunked = options.onAudioChunked, onError = options.onError, _a = options.targetSampleRate, targetSampleRate = _a === void 0 ? DEFAULT_TARGET_SAMPLE_RATE : _a, _b = options.bufferSize, bufferSize = _b === void 0 ? DEFAULT_BUFFER_SIZE : _b, _c = options.enableSilenceDetection, enableSilenceDetection = _c === void 0 ? false : _c, _d = options.silenceThreshold, silenceThreshold = _d === void 0 ? DEFAULT_SILENCE_THRESHOLD : _d, _e = options.silenceDuration, silenceDuration = _e === void 0 ? DEFAULT_SILENCE_DURATION : _e, _f = options.autoStopOnSilence, autoStopOnSilence = _f === void 0 ? false : _f, _g = options.includeDestination, includeDestination = _g === void 0 ? DEFAULT_INCLUDE_DESTINATION : _g; var _h = useState(false), isStreaming = _h[0], setIsStreaming = _h[1]; var audioContextRef = useRef(null); var mediaStreamRef = useRef(null); var scriptProcessorRef = useRef(null); var sourceNodeRef = useRef(null); var silenceDetectorRef = useRef(null); var handleError = useCallback(function (error) { console.error("Voice stream error:", error); if (onError) { onError(error); } }, [onError]); var stopStreaming = useCallback(function () { if (!isStreaming) return; if (scriptProcessorRef.current) { scriptProcessorRef.current.disconnect(); scriptProcessorRef.current.onaudioprocess = null; scriptProcessorRef.current = null; } if (sourceNodeRef.current) { sourceNodeRef.current.disconnect(); sourceNodeRef.current = null; } if (audioContextRef.current) { audioContextRef.current.close(); audioContextRef.current = null; } if (mediaStreamRef.current) { mediaStreamRef.current.getTracks().forEach(function (track) { return track.stop(); }); mediaStreamRef.current = null; } if (silenceDetectorRef.current) { silenceDetectorRef.current.reset(); silenceDetectorRef.current = null; } setIsStreaming(false); if (onStopStreaming) { onStopStreaming(); } }, [isStreaming, onStopStreaming]); var startStreaming = useCallback(function () { return __awaiter(void 0, void 0, void 0, function () { var stream, audioContext, sampleRate_1, sourceNode, scriptProcessor, error_1; return __generator(this, function (_a) { switch (_a.label) { case 0: if (isStreaming) return [2 /*return*/]; _a.label = 1; case 1: _a.trys.push([1, 3, , 4]); return [4 /*yield*/, navigator.mediaDevices.getUserMedia({ audio: true })]; case 2: stream = _a.sent(); mediaStreamRef.current = stream; audioContext = new window.AudioContext(); audioContextRef.current = audioContext; sampleRate_1 = audioContext.sampleRate; sourceNode = audioContext.createMediaStreamSource(stream); sourceNodeRef.current = sourceNode; scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1); scriptProcessorRef.current = scriptProcessor; // Initialize silence detector if enabled if (enableSilenceDetection) { silenceDetectorRef.current = new SilenceDetector(silenceThreshold, silenceDuration, autoStopOnSilence ? stopStreaming : undefined); } scriptProcessor.onaudioprocess = function (audioProcessingEvent) { try { var inputBuffer = audioProcessingEvent.inputBuffer; var channelData = inputBuffer.getChannelData(0); // Process silence detection if enabled if (enableSilenceDetection && silenceDetectorRef.current) { silenceDetectorRef.current.processAudioData(channelData); } var downsampledBuffer = downsampleBuffer(channelData, sampleRate_1, targetSampleRate); var base64Data = int16ArrayToBase64(downsampledBuffer); if (onAudioChunked) { onAudioChunked(base64Data); } } catch (error) { handleError(error instanceof Error ? error : new Error(String(error))); } }; sourceNode.connect(scriptProcessor); if (includeDestination) { scriptProcessor.connect(audioContext.destination); } setIsStreaming(true); if (onStartStreaming) { onStartStreaming(); } return [3 /*break*/, 4]; case 3: error_1 = _a.sent(); handleError(error_1 instanceof Error ? error_1 : new Error(String(error_1))); return [3 /*break*/, 4]; case 4: return [2 /*return*/]; } }); }); }, [ isStreaming, onStartStreaming, onAudioChunked, onError, targetSampleRate, bufferSize, enableSilenceDetection, silenceThreshold, silenceDuration, autoStopOnSilence, includeDestination, stopStreaming, handleError, ]); useEffect(function () { return function () { stopStreaming(); }; }, [stopStreaming]); return { startStreaming: startStreaming, stopStreaming: stopStreaming, isStreaming: isStreaming, }; };