@mazka/react-speech-to-text
Version:
A powerful, TypeScript-first React hook for speech recognition using the Web Speech API. This library provides a simple yet comprehensive interface for converting speech to text in React applications.
489 lines (486 loc) • 16.2 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
useSpeechToText: () => useSpeechToText
});
module.exports = __toCommonJS(index_exports);
// src/use-speech-to-text.ts
var import_react = require("react");
var useSpeechToText = (initialOptions) => {
const [state, setState] = (0, import_react.useState)({
isListening: false,
isSupported: false,
transcript: "",
interimTranscript: "",
finalTranscript: "",
results: [],
error: null,
isInitializing: true,
isAutoStopping: false,
isPaused: false,
lastSpeechTimestamp: null
});
const DEFAULT_OPTIONS = {
continuous: true,
interimResults: true,
maxAlternatives: 1,
language: "en-US",
autoStopOnSilence: {
enabled: false,
silenceDuration: 3e3
// Default to 3 seconds
}
};
const recognitionRef = (0, import_react.useRef)(null);
const lastSpeechTimeRef = (0, import_react.useRef)(null);
const silenceTimeoutRef = (0, import_react.useRef)(null);
const transcriptRef = (0, import_react.useRef)({
final: "",
interim: ""
});
const optionsRef = (0, import_react.useRef)({
...DEFAULT_OPTIONS,
...initialOptions
});
const getBrowserSpeechRecognition = (0, import_react.useCallback)(() => {
if (typeof window === "undefined") return null;
return window.SpeechRecognition || window.webkitSpeechRecognition || window.mozSpeechRecognition || window.msSpeechRecognition || null;
}, []);
const checkBrowserCompatibility = (0, import_react.useCallback)(() => {
if (typeof window === "undefined") {
return {
isSupported: false,
browserName: "Unknown",
reason: "Not running in browser environment"
};
}
const userAgent = navigator.userAgent.toLowerCase();
const isBrave = "brave" in navigator && navigator.brave !== void 0;
const isChrome = userAgent.includes("chrome") && !userAgent.includes("edg") && !isBrave;
const isEdge = userAgent.includes("edg");
const isSafari = userAgent.includes("safari") && !userAgent.includes("chrome");
const isFirefox = userAgent.includes("firefox");
const isOpera = userAgent.includes("opr") || userAgent.includes("opera");
const speechRecognitionExists = getBrowserSpeechRecognition() !== null;
if (isBrave) {
return {
isSupported: false,
browserName: "Brave",
reason: "Brave Browser does not support Web Speech API for privacy reasons"
};
}
if (isFirefox) {
return {
isSupported: false,
browserName: "Firefox",
reason: "Firefox does not support Web Speech API"
};
}
if (isOpera) {
return {
isSupported: speechRecognitionExists,
browserName: "Opera",
reason: speechRecognitionExists ? "" : "Opera has limited Web Speech API support"
};
}
if (isChrome) {
return {
isSupported: speechRecognitionExists,
browserName: "Chrome",
reason: speechRecognitionExists ? "" : "Chrome should support Web Speech API but it's not available"
};
}
if (isEdge) {
return {
isSupported: speechRecognitionExists,
browserName: "Edge",
reason: speechRecognitionExists ? "" : "Edge should support Web Speech API but it's not available"
};
}
if (isSafari) {
const isMobile = /iPhone|iPad|iPod/i.test(navigator.userAgent);
const isMac = /(Macintosh|Mac OS)/.test(navigator.userAgent);
if (isMobile) {
const iosVersion = navigator.userAgent.match(/OS (\d+)_(\d+)/);
if (iosVersion) {
const majorVersion = Number.parseInt(iosVersion[1], 10);
const minorVersion = Number.parseInt(iosVersion[2], 10);
const isSupported = majorVersion > 14 || majorVersion === 14 && minorVersion >= 5;
return {
isSupported: isSupported && speechRecognitionExists,
browserName: "Safari (iOS)",
reason: !isSupported ? "Web Speech API requires iOS 14.5 or later" : !speechRecognitionExists ? "Web Speech API is not available" : ""
};
}
}
if (isMac) {
return {
isSupported: speechRecognitionExists,
browserName: "Safari (macOS)",
reason: speechRecognitionExists ? "" : "Web Speech API requires Safari 14.1 or later"
};
}
return {
isSupported: speechRecognitionExists,
browserName: "Safari",
reason: speechRecognitionExists ? "" : "Web Speech API may not be supported on this Safari version"
};
}
return {
isSupported: speechRecognitionExists,
browserName: "Unknown Browser",
reason: speechRecognitionExists ? "" : "This browser does not support Web Speech API"
};
}, [getBrowserSpeechRecognition]);
const resetSilenceTimeout = (0, import_react.useCallback)(() => {
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (optionsRef.current.autoStopOnSilence?.enabled) {
silenceTimeoutRef.current = window.setTimeout(() => {
if (recognitionRef.current) {
recognitionRef.current.stop();
}
if (optionsRef.current.autoStopOnSilence?.onAutoStop && transcriptRef.current.final) {
optionsRef.current.autoStopOnSilence.onAutoStop(
transcriptRef.current.final
);
}
}, optionsRef.current.autoStopOnSilence.silenceDuration);
}
}, []);
const initializeSpeechRecognition = (0, import_react.useCallback)(() => {
const browserCheck = checkBrowserCompatibility();
if (!browserCheck.isSupported) {
setState((prev) => ({
...prev,
isSupported: false,
isInitializing: false,
error: {
code: "NOT_SUPPORTED",
message: browserCheck.reason,
name: "NotSupportedError",
browserInfo: {
browserName: browserCheck.browserName,
reason: browserCheck.reason
}
}
}));
return null;
}
const SpeechRecognitionConstructor = getBrowserSpeechRecognition();
if (!SpeechRecognitionConstructor) {
setState((prev) => ({
...prev,
isSupported: false,
isInitializing: false,
error: {
code: "NOT_SUPPORTED",
message: "Speech recognition is not supported in this browser",
name: "NotSupportedError",
browserInfo: {
browserName: browserCheck.browserName,
reason: "Web Speech API constructor not found"
}
}
}));
return null;
}
try {
const recognition = new SpeechRecognitionConstructor();
recognition.continuous = optionsRef.current.continuous ?? true;
recognition.interimResults = optionsRef.current.interimResults ?? true;
recognition.maxAlternatives = optionsRef.current.maxAlternatives ?? 1;
recognition.lang = optionsRef.current.language ?? "en-US";
recognition.onstart = () => {
lastSpeechTimeRef.current = /* @__PURE__ */ new Date();
setState((prev) => ({
...prev,
isListening: true,
error: null,
lastSpeechTimestamp: /* @__PURE__ */ new Date()
}));
resetSilenceTimeout();
};
recognition.onresult = (event) => {
let interimTranscript = "";
let finalTranscript = "";
const results = [];
resetSilenceTimeout();
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
const transcript = result[0].transcript;
const confidence = result[0].confidence;
results.push({
transcript,
confidence,
isFinal: result.isFinal,
timestamp: /* @__PURE__ */ new Date()
});
if (result.isFinal) {
finalTranscript += transcript;
} else {
interimTranscript += transcript;
}
}
setState((prev) => {
transcriptRef.current = {
final: prev.finalTranscript + finalTranscript,
interim: interimTranscript
};
return {
...prev,
transcript: finalTranscript + interimTranscript,
interimTranscript,
finalTranscript: prev.finalTranscript + finalTranscript,
results: [...prev.results, ...results]
};
});
};
recognition.onerror = (event) => {
let errorMessage = "An unknown error occurred during speech recognition";
switch (event.error) {
case "no-speech":
errorMessage = "No speech was detected";
break;
case "audio-capture":
errorMessage = "Audio capture failed";
break;
case "not-allowed":
errorMessage = "Permission to use microphone was denied";
break;
case "network":
errorMessage = "Network error occurred";
break;
case "service-not-allowed":
errorMessage = "Speech recognition service is not allowed";
break;
case "bad-grammar":
errorMessage = "Grammar compilation failed";
break;
case "language-not-supported":
errorMessage = "Language is not supported";
break;
case "aborted":
errorMessage = "Speech recognition was aborted";
break;
}
setState((prev) => ({
...prev,
isListening: false,
error: {
code: event.error.toUpperCase().replace("-", "_"),
message: errorMessage,
name: "SpeechRecognitionError"
}
}));
};
recognition.onend = () => {
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
setState((prev) => ({
...prev,
isListening: false
}));
};
setState((prev) => ({
...prev,
isSupported: true,
isInitializing: false
}));
return recognition;
} catch (error) {
setState((prev) => ({
...prev,
isSupported: false,
isInitializing: false,
error: {
code: "INITIALIZATION_ERROR",
message: error instanceof Error ? error.message : "Failed to initialize speech recognition",
name: "InitializationError"
}
}));
return null;
}
}, [
getBrowserSpeechRecognition,
checkBrowserCompatibility,
resetSilenceTimeout
]);
const startListening = (0, import_react.useCallback)(
(options) => {
if (!state.isSupported) {
const browserCheck = checkBrowserCompatibility();
setState((prev) => ({
...prev,
error: {
code: "NOT_SUPPORTED",
message: "Speech recognition is not supported",
name: "NotSupportedError",
browserInfo: {
browserName: browserCheck.browserName,
reason: browserCheck.reason
}
}
}));
return;
}
if (state.isListening) {
return;
}
if (options) {
optionsRef.current = { ...optionsRef.current, ...options };
}
const autoStopEnabled = optionsRef.current.autoStopOnSilence?.enabled === true;
setState((prev) => ({
...prev,
isAutoStopping: autoStopEnabled
}));
if (!recognitionRef.current) {
recognitionRef.current = initializeSpeechRecognition();
}
if (!recognitionRef.current) {
return;
}
try {
recognitionRef.current.continuous = optionsRef.current.continuous ?? true;
recognitionRef.current.interimResults = optionsRef.current.interimResults ?? true;
recognitionRef.current.maxAlternatives = optionsRef.current.maxAlternatives ?? 1;
recognitionRef.current.lang = optionsRef.current.language ?? "en-US";
recognitionRef.current.start();
} catch (error) {
setState((prev) => ({
...prev,
error: {
code: "START_ERROR",
message: error instanceof Error ? error.message : "Failed to start speech recognition",
name: "StartError"
}
}));
}
},
[
state.isSupported,
state.isListening,
initializeSpeechRecognition,
checkBrowserCompatibility
]
);
const stopListening = (0, import_react.useCallback)(() => {
if (recognitionRef.current && state.isListening) {
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
recognitionRef.current.stop();
}
}, [state.isListening]);
const abortListening = (0, import_react.useCallback)(() => {
if (recognitionRef.current) {
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
recognitionRef.current.abort();
}
}, []);
const pauseListening = (0, import_react.useCallback)(() => {
if (recognitionRef.current && state.isListening) {
recognitionRef.current.abort();
setState((prev) => ({
...prev,
isPaused: true,
isListening: false
}));
}
}, [state.isListening]);
const resumeListening = (0, import_react.useCallback)(() => {
if (recognitionRef.current && state.isPaused) {
recognitionRef.current.start();
setState((prev) => ({
...prev,
isPaused: false,
isListening: true
}));
resetSilenceTimeout();
lastSpeechTimeRef.current = /* @__PURE__ */ new Date();
}
}, [state.isPaused, resetSilenceTimeout]);
const resetTranscript = (0, import_react.useCallback)(() => {
transcriptRef.current = { final: "", interim: "" };
setState((prev) => ({
...prev,
transcript: "",
interimTranscript: "",
finalTranscript: "",
results: []
}));
}, []);
const clearError = (0, import_react.useCallback)(() => {
setState((prev) => ({
...prev,
error: null
}));
}, []);
(0, import_react.useEffect)(() => {
recognitionRef.current = initializeSpeechRecognition();
return () => {
if (recognitionRef.current) {
recognitionRef.current.abort();
recognitionRef.current = null;
}
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
};
}, [initializeSpeechRecognition]);
(0, import_react.useEffect)(() => {
return () => {
if (recognitionRef.current) {
recognitionRef.current.abort();
}
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
};
}, []);
return {
// State
...state,
// Actions
startListening,
stopListening,
abortListening,
pauseListening,
resumeListening,
resetTranscript,
clearError
};
};
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
useSpeechToText
});