UNPKG

react-text-to-speech

Version:

An easy-to-use React.js library that leverages the Web Speech API to convert text to speech.

641 lines (633 loc) 26.8 kB
import { __spreadProps, __spreadValues, __objRest, __async } from './chunk-FZ4QVG4I.js'; import React2, { useState, useEffect, useCallback, useRef, useMemo, isValidElement, forwardRef, cloneElement, useLayoutEffect, Fragment } from 'react'; // src/constants.ts var lineDelimiter = "\n"; var punctuationDelimiters = [".", "?", "!"]; var spaceDelimiter = " "; var chunkDelimiters = [lineDelimiter, ...punctuationDelimiters.map((delimiter) => delimiter + spaceDelimiter), spaceDelimiter]; var defaults = { pitch: 1, rate: 1, volume: 1, lang: "", voice: "" }; var desktopChunkSize = 1e3; var directiveRegex = /\[\[(\w+)=([^\]=]+)\]\] ?/; var directiveRegexGlobal = new RegExp(directiveRegex.source, "g"); var highlightedTextIdSuffix = "-highlighted-text"; var idPrefix = "rtts-"; var iosRegex = /iPhone|iPad|iPod/i; var lineSplitRegex = /(.*?)(\n)(.*)/; var minChunkSize = 50; var mobileChunkSize = 250; var mobileRegex = /Android|webOS|BlackBerry|IEMobile|Opera Mini/i; var nonWhitespaceRegex = /\S/; var symbolMapping = { "<": "lessthan", ">": "greaterthan" }; var sanitizeRegex = new RegExp(`[${Object.keys(symbolMapping).join("")}]|(&[^s;]+);`, "g"); var specialSymbol = "\xA0"; var sentenceDelimiters = [lineDelimiter, ...punctuationDelimiters]; var sentenceSplitRegex = /(.*?)(\n|[.!?]\s)(.*)/; var trailingSpacesRegex = /[ \t]+$/; var wordBoundarySeparator = "\u200B"; // src/modules/dom.ts var composeClass = (id, props) => `${id}${(props == null ? void 0 : props.className) ? " " + props.className : ""}`; var composeProps = (id, props) => __spreadProps(__spreadValues({}, props), { className: composeClass(id, props) }); function createElementWithProps(tag, props = {}) { const element = document.createElement(tag); Object.keys(props).forEach((key) => { const value = props[key]; if (value == null) return; if (key === "style" && typeof value === "object") Object.assign(element.style, value); else element[key] = value; }); return element; } function hideElement(element) { if (!element.dataset.originalDisplay) element.dataset.originalDisplay = element.style.display || getComputedStyle(element).display || "block"; Object.assign(element.style, { display: "none" }); } function showElement(element) { const display = element.dataset.originalDisplay; if (display) Object.assign(element.style, { display }); } // src/modules/state.ts var state = { stopReason: "manual" }; var setState = (newState) => Object.assign(state, newState); // src/modules/utils.ts function cancel(stopReason = "manual") { var _a; if (typeof window === "undefined") return; setState({ stopReason }); (_a = window.speechSynthesis) == null ? void 0 : _a.cancel(); } function chunkBySizeWithDelimiters(text, size) { const length = text.length; const result = []; let startIndex = 0; while (startIndex < length) { let endIndex = Math.min(startIndex + size, length); if (endIndex < length && text[endIndex] !== lineDelimiter) for (const delimiter of chunkDelimiters) { const delimiterIndex = text.lastIndexOf(delimiter, endIndex) + delimiter.length - 1; if (delimiterIndex > startIndex) { endIndex = delimiterIndex; break; } } result.push(text.slice(startIndex, endIndex)); startIndex = endIndex; } return result; } var getProgress = (current, total) => total && Math.floor(current / total * 100); function isMobile(iOS = true) { var _a; let result = (_a = navigator.userAgentData) == null ? void 0 : _a.mobile; result != null ? result : result = mobileRegex.test(navigator.userAgent) || iOS && iosRegex.test(navigator.userAgent); return result; } function parse(value) { if (value === "true") return true; if (value === "false") return false; const number = +value; if (!isNaN(number) && value !== "") return number; return value; } var sanitize = (text) => text.replace(sanitizeRegex, (match, group) => group ? group + ")" : ` ${symbolMapping[match]}${specialSymbol}`); function shouldHighlightNextPart(highlightMode, utterance, charIndex) { if (highlightMode === "word" || !charIndex) return true; const text = utterance.text.slice(0, charIndex).replace(trailingSpacesRegex, spaceDelimiter).slice(-2); if (highlightMode === "sentence" && (text[1] === lineDelimiter || sentenceDelimiters.includes(text[0]) && text[1] === spaceDelimiter)) return true; if (highlightMode === "line" && (text[1] === lineDelimiter || text[0] === lineDelimiter && text[1] === spaceDelimiter)) return true; return false; } function splitNode(highlightMode, node, speakingWord) { if (highlightMode === "paragraph") return ["", node, ""]; const { index, length } = speakingWord; const beforeIndex = +index.split("-").at(-1); const before = node.slice(0, beforeIndex); if (highlightMode === "word") return [before, node.slice(beforeIndex, beforeIndex + length), node.slice(beforeIndex + length)]; node = node.slice(beforeIndex); const match = node.match(highlightMode === "sentence" ? sentenceSplitRegex : lineSplitRegex); if (!match) return [before, node, ""]; const sentence = match[1] + match[2].trimEnd(); return [before, sentence, node.slice(sentence.length)]; } function textToChunks(text, size, enableDirectives) { size = size ? Math.max(size, minChunkSize) : isMobile() ? mobileChunkSize : desktopChunkSize; const regex = new RegExp(`${enableDirectives ? directiveRegex.source + "|" : ""}${wordBoundarySeparator}`, "g"); const chunks = []; let currentIndex = 0; let match; while ((match = regex.exec(text)) !== null) { const directiveIndex = match.index; if (directiveIndex > currentIndex) { const preDirectiveText = text.slice(currentIndex, directiveIndex); chunks.push(...chunkBySizeWithDelimiters(preDirectiveText, size)); } if (match[0] !== wordBoundarySeparator) chunks.push(match[0]); currentIndex = regex.lastIndex; } if (currentIndex < text.length) { const remainingText = text.slice(currentIndex); chunks.push(...chunkBySizeWithDelimiters(remainingText, size)); } return chunks; } // src/modules/queue.ts var queue = []; var queueListeners = []; function addToQueue(item, callback) { queue.push(item); emit(callback); } function clearQueue(cancelSpeech = false, start = 0, emitEvent = false) { if (cancelSpeech) cancel(); queue.slice(start).forEach(({ setSpeechStatus }) => setSpeechStatus("stopped")); queue.length = 0; if (emitEvent) emit(); } var clearQueueHook = () => clearQueue(true, 1, true); var clearQueueUnload = () => clearQueue(true, 1); function dequeue(index = 0) { if (index === 0) cancel(); else removeFromQueue(index); } function emit(callback) { const utteranceQueue = queue.map(({ text, utterance: { pitch, rate, volume, lang, voice } }) => ({ text, pitch, rate, volume, lang, voice })); queueListeners.forEach((listener) => listener(utteranceQueue)); callback == null ? void 0 : callback(utteranceQueue); } function removeFromQueue(utterance, callback) { const index = typeof utterance === "number" ? utterance : queue.findIndex((item2) => item2.utterance === utterance); if (index === -1) return; const [item] = queue.splice(index, 1); if (item) { if (index === 0) cancel(); else item.setSpeechStatus("stopped"); emit(callback); } } function speakFromQueue() { const item = queue[0]; if (item) window.speechSynthesis.speak(item.utterance); } function subscribe(callback) { queueListeners.push(callback); return () => { const index = queueListeners.indexOf(callback); if (index !== -1) queueListeners.splice(index, 1); }; } function findCharIndex(words, index) { let currentIndex = 0; function recursiveSearch(currentWords, parentIndex = "") { if (typeof currentWords === "string") { const elementIndex = index - currentIndex; return (currentIndex += currentWords.length) > index ? getIndex(parentIndex, elementIndex) : ""; } for (let i = 0; i < currentWords.length; i++) { const result = recursiveSearch(currentWords[i], i); if (result) return getIndex(parentIndex, result); } return ""; } return recursiveSearch(words); } var getIndex = (parentIndex, index) => `${parentIndex === "" ? "" : parentIndex + "-"}${index}`; function indexText(node, id, index = "") { var _a; if (typeof node == "string" || typeof node == "number") return node; if (Array.isArray(node)) return node.map((child, i) => indexText(child, id, getIndex(index, i))); if (isValidElement(node)) { const props = { key: (_a = node.key) != null ? _a : index, children: indexText(normalizeChildren(node.props.children), id, index) }; if (node.type !== Fragment) props.className = composeClass(`${id}${index}`, node.props); return cloneElement(node, props); } return node; } function isParent(parentIndex, index) { if (!(index == null ? void 0 : index.startsWith(parentIndex))) return false; if (parentIndex) { const indexParts = index.split("-"); const parentIndexParts = parentIndex.split("-"); for (let i = 0; i < parentIndexParts.length; i++) { if (indexParts[i] !== parentIndexParts[i]) return false; } } return true; } function isSetStateFunction(v) { return typeof v === "function"; } function nodeToKey(node) { if (typeof node === "string") return node; if (typeof node === "number") return String(node); if (Array.isArray(node)) return node.map(nodeToKey).join(""); if (isValidElement(node)) { const nodeType = typeof node.type === "string" ? node.type : "Component"; const _a = node.props, { children } = _a, props = __objRest(_a, ["children"]); const propsKey = JSON.stringify(props); const childrenKey = nodeToKey(children); return `${nodeType}(${propsKey})[${childrenKey}]`; } return ""; } function nodeToWords(node) { if (typeof node === "string") return node; if (typeof node === "number") return String(node); if (Array.isArray(node)) return node.map(nodeToWords); if (isValidElement(node)) return nodeToWords(normalizeChildren(node.props.children)); return ""; } function normalizeChildren(node) { if (Array.isArray(node)) return node.map((element, i) => { if (typeof element === "number" || typeof element === "string" && nonWhitespaceRegex.test(element)) return /* @__PURE__ */ React2.createElement("span", { key: i }, element); return element; }); if (isValidElement(node)) return [node]; return node; } function parent(index) { if (!index) return ""; const lastIndex = index.lastIndexOf("-"); return lastIndex === -1 ? "" : index.slice(0, lastIndex); } function stripDirectives(node) { if (typeof node === "string") return node.replace(directiveRegexGlobal, ""); if (typeof node === "number") return node; if (Array.isArray(node)) return node.map(stripDirectives); if (isValidElement(node)) return cloneElement(node, { children: stripDirectives(node.props.children) }); return null; } function toText(node) { if (typeof node === "string") return node; if (typeof node === "number") return String(node); if (Array.isArray(node)) return node.map(toText).join(wordBoundarySeparator); if (isValidElement(node)) return toText(node.props.children); return ""; } // src/hooks.tsx function useStableValue(value, mode, delay) { const [stableValue, setStableValue] = useState(value); const lastUpdated = useRef(0); useEffect(() => { let timeout = null; if (mode === "immediate" || delay <= 0) timeout = setTimeout(() => setStableValue(value), 0); else if (mode === "debounce") timeout = setTimeout(() => setStableValue(value), delay); else if (mode === "throttle") { const now = Date.now(); const elapsed = now - lastUpdated.current; timeout = setTimeout( () => { setStableValue(value); lastUpdated.current = Date.now(); }, Math.max(0, delay - elapsed) ); } return () => { if (timeout) clearTimeout(timeout); }; }, [value, mode, delay]); return stableValue; } function useQueue() { const [queue2, setQueue] = useState([]); useEffect(() => subscribe(setQueue), []); return { queue: queue2, dequeue, clearQueue: clearQueueHook }; } function useSpeak(options) { const [speechProps, setSpeechProps] = useState({ text: "" }); const _a = useSpeech(__spreadProps(__spreadValues(__spreadValues({}, speechProps), options), { autoPlay: false })), { start } = _a, speechInterface = __objRest(_a, ["start"]); const speak = useCallback((text, options2 = {}) => setSpeechProps(__spreadValues({ text }, options2)), []); useEffect(() => { if (speechProps.text) start(); }, [speechProps]); return __spreadValues({ speak, start }, speechInterface); } function useSpeech(speechProps) { const _a = useSpeechInternal(speechProps), { uniqueId, normalizedText, reactContent } = _a, speechInterface = __objRest(_a, ["uniqueId", "normalizedText", "reactContent"]); return speechInterface; } function useSpeechInternal({ id, text, pitch = defaults.pitch, rate = defaults.rate, volume = defaults.volume, lang = defaults.lang, voiceURI = defaults.voice, autoPlay = false, preserveUtteranceQueue = false, highlightText = false, showOnlyHighlightedText = false, highlightMode = "word", highlightProps, highlightContainerProps, enableDirectives = false, updateMode = "immediate", updateDelay = 0, maxChunkSize, onError = console.error, onStart, onResume, onPause, onStop, onBoundary, onQueueChange }) { const [speechStatus, speechStatusRef, setSpeechStatus] = useStateRef("stopped"); const [speakingWord, speakingWordRef, setSpeakingWord] = useStateRef(null); const { utteranceRef, updateProps } = useSpeechSynthesisUtterance(); const highlightRef = useRef(false); const directiveRef = useRef({ event: null, delay: 0 }); const uniqueId = useMemo(() => `${idPrefix}${id != null ? id : crypto.randomUUID()}`, [id]); const key = useMemo(() => nodeToKey(text), [text]); const stableKey = useStableValue(key, updateMode, updateDelay); const stringifiedVoices = useMemo(() => voiceURI.toString(), [voiceURI]); const normalizedText = useMemo(() => isValidElement(text) ? [text] : text, [stableKey]); const { indexedText, sanitizedText, speechText, words } = useMemo(() => { const strippedText = enableDirectives ? stripDirectives(normalizedText) : normalizedText; const words2 = nodeToWords(strippedText); const sanitizedText2 = `${spaceDelimiter}${sanitize(toText(enableDirectives ? normalizedText : words2))}`; const speechText2 = stripDirectives(sanitizedText2).trimStart(); return { indexedText: indexText(strippedText, uniqueId), sanitizedText: sanitizedText2, speechText: speechText2, words: words2 }; }, [enableDirectives, normalizedText]); const chunks = useMemo(() => textToChunks(sanitizedText, maxChunkSize, enableDirectives), [enableDirectives, maxChunkSize, sanitizedText]); const reactContent = useMemo(() => showOnlyHighlightedText ? highlightedText(indexedText) : indexedText, [speakingWord, showOnlyHighlightedText, highlightMode, indexedText]); const Text = useCallback( forwardRef((props, ref) => /* @__PURE__ */ React2.createElement("div", __spreadValues({ ref }, composeProps(uniqueId, props)), reactContent)), [reactContent] ); function reset(event = null) { var _a, _b; (_b = (_a = directiveRef.current).abortDelay) == null ? void 0 : _b.call(_a); directiveRef.current = { event, delay: 0 }; } function resumeEventHandler() { setSpeechStatus("started"); onResume == null ? void 0 : onResume(); } function pauseEventHandler() { setSpeechStatus("paused"); onPause == null ? void 0 : onPause(); } function start() { const synth = window.speechSynthesis; if (!synth) return onError(new Error("Browser not supported! Try some other browser.")); if (speechStatusRef.current === "paused") { if (directiveRef.current.event === "pause") speakFromQueue(); synth.resume(); return resumeEventHandler(); } if (speechStatusRef.current === "queued") return; let currentChunk = 0; let currentText = chunks[currentChunk] || ""; let processedTextLength = -spaceDelimiter.length; const utterance = utteranceRef.current; utterance.text = currentText.trimStart(); let offset = processedTextLength + currentText.length - utterance.text.length; let specialSymbolOffset = 0; updateProps({ pitch, rate, volume, lang, voiceURI }); function handleDirectives() { let skip = false; while (currentChunk < chunks.length) { const match = directiveRegex.exec(currentText); if (!match) { if (!skip) return true; processedTextLength += currentText.length; } else { const key2 = match[1]; const value = parse(match[2]); switch (key2) { case "delay": directiveRef.current.delay += value; break; case "pitch": case "rate": case "volume": case "lang": case "voice": updateProps({ [key2]: value === "default" ? defaults[key2] : value }); break; case "skip": if (typeof value === "boolean") skip = value; break; } } currentText = chunks[++currentChunk]; } return false; } function startEventHandler() { window.addEventListener("beforeunload", clearQueueUnload); setState({ stopReason: "auto" }); highlightRef.current = true; if (directiveRef.current.event || directiveRef.current.delay) reset(); else onStart == null ? void 0 : onStart(); onBoundary == null ? void 0 : onBoundary({ progress: getProgress(offset, speechText.length) }); } function stopEventHandler() { return __async(this, null, function* () { if (state.stopReason === "auto" && currentChunk < chunks.length - 1) { processedTextLength += currentText.length; currentText = chunks[++currentChunk]; const continueSpeech = !enableDirectives || handleDirectives(); if (continueSpeech) { utterance.text = currentText.trimStart(); offset = processedTextLength + currentText.length - utterance.text.length; if (speechStatusRef.current === "paused") return reset("pause"); const { delay } = directiveRef.current; directiveRef.current.event = "change"; if (!delay) return speakFromQueue(); const timeout = setTimeout(speakFromQueue, delay); directiveRef.current.abortDelay = () => clearTimeout(timeout); return; } } if (state.stopReason === "change") { if (speakingWordRef.current) { const currentLength = utterance.text.length; utterance.text = utterance.text.slice(speakingWordRef.current.charIndex).trimStart(); offset += currentLength - utterance.text.length; setSpeakingWord(null); } return speakFromQueue(); } if (state.stopReason === "auto") onBoundary == null ? void 0 : onBoundary({ progress: 100 }); if (synth.paused) cancel(); window.removeEventListener("beforeunload", clearQueueUnload); setSpeechStatus("stopped"); setSpeakingWord(null); utterance.onstart = null; utterance.onend = null; utterance.onerror = null; utterance.onboundary = null; removeFromQueue(utterance, onQueueChange); speakFromQueue(); onStop == null ? void 0 : onStop(); }); } function boundaryEventHandler(event) { var _a; const { charIndex, charLength, name } = event; if (name === "word") { const isSpecialSymbol = +(utterance.text[charIndex + charLength] === specialSymbol); const index = findCharIndex(words, offset + specialSymbolOffset + charIndex - isSpecialSymbol); if (shouldHighlightNextPart(highlightMode, utterance, charIndex) || parent(index) !== parent((_a = speakingWordRef.current) == null ? void 0 : _a.index)) setSpeakingWord({ index, charIndex: isSpecialSymbol ? charIndex + charLength + 1 : charIndex, length: isSpecialSymbol || charLength }); if (isSpecialSymbol) specialSymbolOffset -= charLength + 1; } onBoundary == null ? void 0 : onBoundary({ progress: getProgress(offset + charIndex + charLength, speechText.length) }); } utterance.onstart = startEventHandler; utterance.onend = stopEventHandler; utterance.onerror = stopEventHandler; utterance.onboundary = boundaryEventHandler; if (!preserveUtteranceQueue) clearQueue(); addToQueue({ text: speechText, utterance, setSpeechStatus }, onQueueChange); setSpeechStatus("started"); if (!synth.speaking) return speakFromQueue(); if (preserveUtteranceQueue && speechStatus !== "started") return setSpeechStatus("queued"); cancel(); } function pause() { var _a; if (isMobile(false) || speechStatusRef.current === "queued") return stop(); if (speechStatusRef.current === "started") { if (directiveRef.current.delay) reset("pause"); else (_a = window.speechSynthesis) == null ? void 0 : _a.pause(); pauseEventHandler(); } } function stop({ status = speechStatusRef.current, stopReason } = {}) { if (status === "stopped") return; if (status === "queued") { removeFromQueue(utteranceRef.current, onQueueChange); return setSpeechStatus("stopped"); } if (directiveRef.current.delay || directiveRef.current.event === "pause") { reset(); speakFromQueue(); } cancel(stopReason); } function highlightedText(node, index = "") { if (!highlightText || !isParent(index, speakingWord == null ? void 0 : speakingWord.index)) return; switch (typeof node) { case "number": node = String(node); case "string": const highlighted = splitNode(highlightMode, node, speakingWord)[1]; return /* @__PURE__ */ React2.createElement("span", __spreadValues({}, highlightContainerProps), /* @__PURE__ */ React2.createElement("mark", __spreadValues({}, highlightProps), highlighted)); } if (Array.isArray(node)) return node.map((child, i) => highlightedText(child, getIndex(index, i))); if (isValidElement(node)) return cloneElement(node, { children: highlightedText(node.props.children, index) }); } useLayoutEffect(() => { highlightRef.current = false; if (autoPlay) start(); return () => stop({ status: speechStatusRef.current }); }, [autoPlay, enableDirectives, normalizedText]); useLayoutEffect(() => { if (!highlightText || !highlightRef.current || showOnlyHighlightedText || !speakingWord) return; const parts = speakingWord.index.split("-"); const parentIndex = parts.slice(0, -1).join("-"); const elements = Array.from(document.getElementsByClassName(`${uniqueId}${parentIndex}`)); const elementSnapshots = elements.map((element) => { const originalTextContent = element.textContent; const [before, highlighted, after] = splitNode(highlightMode, element.textContent, speakingWord); const container = createElementWithProps("span", highlightContainerProps); const mark = createElementWithProps("mark", highlightProps); mark.textContent = highlighted; if (before) container.appendChild(document.createTextNode(before)); container.appendChild(mark); if (after) container.appendChild(document.createTextNode(after)); element.textContent = ""; element.appendChild(container); return [element, originalTextContent]; }); return () => { if (highlightRef.current) elementSnapshots.forEach(([element, originalTextContent]) => element.textContent = originalTextContent); }; }, [speakingWord, highlightText, showOnlyHighlightedText, highlightMode]); useEffect(() => { if (speechStatusRef.current !== "started") return; const timeout = setTimeout(() => { updateProps({ pitch, rate, volume, lang, voiceURI }); if (directiveRef.current.delay) return; stop({ stopReason: "change" }); emit(onQueueChange); }, 500); return () => clearTimeout(timeout); }, [pitch, rate, volume, lang, stringifiedVoices]); return { uniqueId, normalizedText, reactContent, Text, speechStatus, isInQueue: speechStatus === "started" || speechStatus === "queued", start, pause, stop: () => stop() }; } function useSpeechSynthesisUtterance() { const utteranceRef = useRef(typeof window === "undefined" || !window.speechSynthesis ? null : new SpeechSynthesisUtterance()); const { voices } = useVoices(); function updateProps({ pitch, rate, volume, lang, voiceURI }) { const utterance = utteranceRef.current; if (!utterance) return; if (pitch !== void 0) utterance.pitch = pitch; if (rate !== void 0) utterance.rate = rate; if (volume !== void 0) utterance.volume = volume; if (lang !== void 0) utterance.lang = lang; if (voiceURI === void 0) return; if (!voiceURI) { utterance.voice = null; return; } if (!Array.isArray(voiceURI)) voiceURI = [voiceURI]; for (let i = 0; i < voiceURI.length; i++) { const uri = voiceURI[i]; const voice = voices.find((voice2) => voice2.voiceURI === uri); if (voice) { utterance.voice = voice; if (!lang) utterance.lang = voice.lang; return; } } } return { utteranceRef, updateProps }; } function useStateRef(init) { const [state2, setState2] = useState(init); const ref = useRef(init); function setStateRef(value) { setState2((prev) => { const next = isSetStateFunction(value) ? value(prev) : value; ref.current = next; return next; }); } return [state2, ref, setStateRef]; } function useVoices() { const [languages, setLanguages] = useState([]); const [voices, setVoices] = useState([]); function setData(voices2) { setLanguages([...new Set(voices2.map(({ lang }) => lang))]); setVoices(voices2); } useEffect(() => { const synth = window.speechSynthesis; if (!synth) return; const voices2 = synth.getVoices(); if (voices2.length) setData(voices2); else { const onVoicesChanged = () => setData(synth.getVoices()); synth.addEventListener("voiceschanged", onVoicesChanged); return () => synth.removeEventListener("voiceschanged", onVoicesChanged); } }, []); return { languages, voices }; } export { composeProps, hideElement, highlightedTextIdSuffix, idPrefix, showElement, useQueue, useSpeak, useSpeech, useSpeechInternal, useVoices };