voice-stream
Version:
A powerful React hook for real-time voice streaming, designed for AI-powered applications. Perfect for real-time transcription, voice assistants, and audio processing with features like silence detection and configurable audio processing.
170 lines (169 loc) • 9.26 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (g && (g = 0, op[0] && (_ = 0)), _) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
import { useRef, useCallback, useEffect, useState } from "react";
import { downsampleBuffer } from "../utils/downsample-buffer";
import { int16ArrayToBase64 } from "../utils/int16array-to-base64";
import { SilenceDetector } from "../utils/silence-detection";
import { DEFAULT_TARGET_SAMPLE_RATE, DEFAULT_BUFFER_SIZE, DEFAULT_SILENCE_THRESHOLD, DEFAULT_SILENCE_DURATION, DEFAULT_INCLUDE_DESTINATION, } from "../constants/voice-stream.constants";
export var useVoiceStream = function (options) {
var onStartStreaming = options.onStartStreaming, onStopStreaming = options.onStopStreaming, onAudioChunked = options.onAudioChunked, onError = options.onError, _a = options.targetSampleRate, targetSampleRate = _a === void 0 ? DEFAULT_TARGET_SAMPLE_RATE : _a, _b = options.bufferSize, bufferSize = _b === void 0 ? DEFAULT_BUFFER_SIZE : _b, _c = options.enableSilenceDetection, enableSilenceDetection = _c === void 0 ? false : _c, _d = options.silenceThreshold, silenceThreshold = _d === void 0 ? DEFAULT_SILENCE_THRESHOLD : _d, _e = options.silenceDuration, silenceDuration = _e === void 0 ? DEFAULT_SILENCE_DURATION : _e, _f = options.autoStopOnSilence, autoStopOnSilence = _f === void 0 ? false : _f, _g = options.includeDestination, includeDestination = _g === void 0 ? DEFAULT_INCLUDE_DESTINATION : _g;
var _h = useState(false), isStreaming = _h[0], setIsStreaming = _h[1];
var audioContextRef = useRef(null);
var mediaStreamRef = useRef(null);
var scriptProcessorRef = useRef(null);
var sourceNodeRef = useRef(null);
var silenceDetectorRef = useRef(null);
var handleError = useCallback(function (error) {
console.error("Voice stream error:", error);
if (onError) {
onError(error);
}
}, [onError]);
var stopStreaming = useCallback(function () {
if (!isStreaming)
return;
if (scriptProcessorRef.current) {
scriptProcessorRef.current.disconnect();
scriptProcessorRef.current.onaudioprocess = null;
scriptProcessorRef.current = null;
}
if (sourceNodeRef.current) {
sourceNodeRef.current.disconnect();
sourceNodeRef.current = null;
}
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach(function (track) { return track.stop(); });
mediaStreamRef.current = null;
}
if (silenceDetectorRef.current) {
silenceDetectorRef.current.reset();
silenceDetectorRef.current = null;
}
setIsStreaming(false);
if (onStopStreaming) {
onStopStreaming();
}
}, [isStreaming, onStopStreaming]);
var startStreaming = useCallback(function () { return __awaiter(void 0, void 0, void 0, function () {
var stream, audioContext, sampleRate_1, sourceNode, scriptProcessor, error_1;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
if (isStreaming)
return [2 /*return*/];
_a.label = 1;
case 1:
_a.trys.push([1, 3, , 4]);
return [4 /*yield*/, navigator.mediaDevices.getUserMedia({ audio: true })];
case 2:
stream = _a.sent();
mediaStreamRef.current = stream;
audioContext = new window.AudioContext();
audioContextRef.current = audioContext;
sampleRate_1 = audioContext.sampleRate;
sourceNode = audioContext.createMediaStreamSource(stream);
sourceNodeRef.current = sourceNode;
scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
scriptProcessorRef.current = scriptProcessor;
// Initialize silence detector if enabled
if (enableSilenceDetection) {
silenceDetectorRef.current = new SilenceDetector(silenceThreshold, silenceDuration, autoStopOnSilence ? stopStreaming : undefined);
}
scriptProcessor.onaudioprocess = function (audioProcessingEvent) {
try {
var inputBuffer = audioProcessingEvent.inputBuffer;
var channelData = inputBuffer.getChannelData(0);
// Process silence detection if enabled
if (enableSilenceDetection && silenceDetectorRef.current) {
silenceDetectorRef.current.processAudioData(channelData);
}
var downsampledBuffer = downsampleBuffer(channelData, sampleRate_1, targetSampleRate);
var base64Data = int16ArrayToBase64(downsampledBuffer);
if (onAudioChunked) {
onAudioChunked(base64Data);
}
}
catch (error) {
handleError(error instanceof Error ? error : new Error(String(error)));
}
};
sourceNode.connect(scriptProcessor);
if (includeDestination) {
scriptProcessor.connect(audioContext.destination);
}
setIsStreaming(true);
if (onStartStreaming) {
onStartStreaming();
}
return [3 /*break*/, 4];
case 3:
error_1 = _a.sent();
handleError(error_1 instanceof Error ? error_1 : new Error(String(error_1)));
return [3 /*break*/, 4];
case 4: return [2 /*return*/];
}
});
}); }, [
isStreaming,
onStartStreaming,
onAudioChunked,
onError,
targetSampleRate,
bufferSize,
enableSilenceDetection,
silenceThreshold,
silenceDuration,
autoStopOnSilence,
includeDestination,
stopStreaming,
handleError,
]);
useEffect(function () {
return function () {
stopStreaming();
};
}, [stopStreaming]);
return {
startStreaming: startStreaming,
stopStreaming: stopStreaming,
isStreaming: isStreaming,
};
};