UNPKG

react-native-executorch

Version:

An easy way to run AI models in React Native with ExecuTorch

205 lines (195 loc) 7.13 kB
"use strict"; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; import { ResourceFetcher } from '../../utils/ResourceFetcher'; import { Logger } from '../../common/Logger'; /** * Module for Text to Speech (TTS) functionalities. * @category Typescript API */ export class TextToSpeechModule { isStreaming = false; constructor(nativeModule) { this.nativeModule = nativeModule; } /** * Creates a Text to Speech instance. * @param config - Configuration object containing `model` and `voice`. * Pass one of the built-in constants (e.g. `{ model: KOKORO_MEDIUM, voice: KOKORO_VOICE_AF_HEART }`), or use require() to pass them. * @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1. * @returns A Promise resolving to a `TextToSpeechModule` instance. * @example * ```ts * import { TextToSpeechModule, KOKORO_MEDIUM, KOKORO_VOICE_AF_HEART } from 'react-native-executorch'; * const tts = await TextToSpeechModule.fromModelName( * { model: KOKORO_MEDIUM, voice: KOKORO_VOICE_AF_HEART }, * ); * ``` */ static async fromModelName(config, onDownloadProgress = () => {}) { try { const nativeModule = await TextToSpeechModule.loadKokoro(config.model, config.voice, onDownloadProgress); return new TextToSpeechModule(nativeModule); } catch (error) { Logger.error('Load failed:', error); throw parseUnknownError(error); } } static async loadKokoro(model, voice, onDownloadProgressCallback) { if (!voice.extra || !voice.extra.taggerSource || !voice.extra.lexiconSource) { throw new RnExecutorchError(RnExecutorchErrorCode.InvalidConfig, 'Kokoro: voice config is missing required extra fields: taggerSource and/or lexiconSource.'); } const paths = await ResourceFetcher.fetch(onDownloadProgressCallback, model.durationPredictorSource, model.synthesizerSource, voice.voiceSource, voice.extra.taggerSource, voice.extra.lexiconSource); if (paths === null || paths.length !== 5) { throw new RnExecutorchError(RnExecutorchErrorCode.DownloadInterrupted, 'Download interrupted or missing resource.'); } const modelPaths = paths.slice(0, 2); const voiceDataPath = paths[2]; const phonemizerPaths = paths.slice(3, 5); return await global.loadTextToSpeechKokoro(voice.lang, phonemizerPaths[0], phonemizerPaths[1], modelPaths[0], modelPaths[1], voiceDataPath); } ensureLoaded(methodName) { if (this.nativeModule == null) throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded, `The model is currently not loaded. Please load the model before calling ${methodName}().`); } /** * Synthesizes the provided text into speech. * Returns a promise that resolves to the full audio waveform as a `Float32Array`. * @param text The input text to be synthesized. * @param speed Optional speed multiplier for the speech synthesis (default is 1.0). * @returns A promise resolving to the synthesized audio waveform. */ async forward(text, speed = 1.0) { this.ensureLoaded('forward'); return await this.nativeModule.generate(text, speed); } /** * Synthesizes pre-computed phonemes into speech, bypassing the built-in phonemizer. * This allows using an external G2P system (e.g. the Python `phonemizer` library, * espeak-ng, or any custom phonemizer). * @param phonemes The pre-computed IPA phoneme string. * @param speed Optional speed multiplier for the speech synthesis (default is 1.0). * @returns A promise resolving to the synthesized audio waveform. */ async forwardFromPhonemes(phonemes, speed = 1.0) { this.ensureLoaded('forwardFromPhonemes'); return await this.nativeModule.generateFromPhonemes(phonemes, speed); } /** * Starts a streaming synthesis session. Yields audio chunks as they are generated. * @param input - Input object containing text and optional speed. * @yields An audio chunk generated during synthesis. * @returns An async generator yielding Float32Array audio chunks. */ async *stream({ speed, stopAutomatically }) { // Stores computed audio segments const queue = []; let waiter = null; let error; let nativeStreamFinished = false; this.isStreaming = true; const wake = () => { waiter?.(); waiter = null; }; (async () => { try { await this.nativeModule.stream(speed, stopAutomatically, audio => { queue.push(new Float32Array(audio)); wake(); }); nativeStreamFinished = true; wake(); } catch (e) { error = e; nativeStreamFinished = true; wake(); } })(); while (this.isStreaming) { if (queue.length > 0) { yield queue.shift(); if (nativeStreamFinished && queue.length === 0) { return; } continue; } if (error) throw error; await new Promise(r => waiter = r); } } /** * Starts a streaming synthesis session from pre-computed phonemes. * Bypasses the built-in phonemizer, allowing use of external G2P systems. * @param input - Input object containing phonemes and optional speed. * @yields An audio chunk generated during synthesis. * @returns An async generator yielding Float32Array audio chunks. */ async *streamFromPhonemes({ phonemes, speed }) { const queue = []; let waiter = null; let error; let nativeStreamFinished = false; const wake = () => { waiter?.(); waiter = null; }; (async () => { try { await this.nativeModule.streamFromPhonemes(phonemes, speed, audio => { queue.push(new Float32Array(audio)); wake(); }); nativeStreamFinished = true; wake(); } catch (e) { error = e; nativeStreamFinished = true; wake(); } })(); while (this.isStreaming) { if (queue.length > 0) { yield queue.shift(); if (nativeStreamFinished && queue.length === 0) { return; } continue; } if (error) throw error; await new Promise(r => waiter = r); } } /** * Inserts new text chunk into the buffer to be processed in streaming mode. * @param textChunk - The text fragment to append to the streaming buffer. */ streamInsert(textChunk) { this.nativeModule.streamInsert(textChunk); } /** * Stops the streaming process if there is any ongoing. * @param instant - If true, stops the streaming as soon as possible. Otherwise * allows the module to complete processing for the remains of the buffer. */ streamStop(instant = true) { this.nativeModule.streamStop(instant); if (instant) { this.isStreaming = false; } } /** * Unloads the model from memory. */ delete() { if (this.nativeModule !== null) { this.nativeModule.unload(); } } } //# sourceMappingURL=TextToSpeechModule.js.map