react-native-executorch
Version:
An easy way to run AI models in React Native with ExecuTorch
205 lines (195 loc) • 7.13 kB
JavaScript
;
import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
import { ResourceFetcher } from '../../utils/ResourceFetcher';
import { Logger } from '../../common/Logger';
/**
* Module for Text to Speech (TTS) functionalities.
* @category Typescript API
*/
export class TextToSpeechModule {
isStreaming = false;
constructor(nativeModule) {
this.nativeModule = nativeModule;
}
/**
* Creates a Text to Speech instance.
* @param config - Configuration object containing `model` and `voice`.
* Pass one of the built-in constants (e.g. `{ model: KOKORO_MEDIUM, voice: KOKORO_VOICE_AF_HEART }`), or use require() to pass them.
* @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1.
* @returns A Promise resolving to a `TextToSpeechModule` instance.
* @example
* ```ts
* import { TextToSpeechModule, KOKORO_MEDIUM, KOKORO_VOICE_AF_HEART } from 'react-native-executorch';
* const tts = await TextToSpeechModule.fromModelName(
* { model: KOKORO_MEDIUM, voice: KOKORO_VOICE_AF_HEART },
* );
* ```
*/
static async fromModelName(config, onDownloadProgress = () => {}) {
try {
const nativeModule = await TextToSpeechModule.loadKokoro(config.model, config.voice, onDownloadProgress);
return new TextToSpeechModule(nativeModule);
} catch (error) {
Logger.error('Load failed:', error);
throw parseUnknownError(error);
}
}
static async loadKokoro(model, voice, onDownloadProgressCallback) {
if (!voice.extra || !voice.extra.taggerSource || !voice.extra.lexiconSource) {
throw new RnExecutorchError(RnExecutorchErrorCode.InvalidConfig, 'Kokoro: voice config is missing required extra fields: taggerSource and/or lexiconSource.');
}
const paths = await ResourceFetcher.fetch(onDownloadProgressCallback, model.durationPredictorSource, model.synthesizerSource, voice.voiceSource, voice.extra.taggerSource, voice.extra.lexiconSource);
if (paths === null || paths.length !== 5) {
throw new RnExecutorchError(RnExecutorchErrorCode.DownloadInterrupted, 'Download interrupted or missing resource.');
}
const modelPaths = paths.slice(0, 2);
const voiceDataPath = paths[2];
const phonemizerPaths = paths.slice(3, 5);
return await global.loadTextToSpeechKokoro(voice.lang, phonemizerPaths[0], phonemizerPaths[1], modelPaths[0], modelPaths[1], voiceDataPath);
}
ensureLoaded(methodName) {
if (this.nativeModule == null) throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded, `The model is currently not loaded. Please load the model before calling ${methodName}().`);
}
/**
* Synthesizes the provided text into speech.
* Returns a promise that resolves to the full audio waveform as a `Float32Array`.
* @param text The input text to be synthesized.
* @param speed Optional speed multiplier for the speech synthesis (default is 1.0).
* @returns A promise resolving to the synthesized audio waveform.
*/
async forward(text, speed = 1.0) {
this.ensureLoaded('forward');
return await this.nativeModule.generate(text, speed);
}
/**
* Synthesizes pre-computed phonemes into speech, bypassing the built-in phonemizer.
* This allows using an external G2P system (e.g. the Python `phonemizer` library,
* espeak-ng, or any custom phonemizer).
* @param phonemes The pre-computed IPA phoneme string.
* @param speed Optional speed multiplier for the speech synthesis (default is 1.0).
* @returns A promise resolving to the synthesized audio waveform.
*/
async forwardFromPhonemes(phonemes, speed = 1.0) {
this.ensureLoaded('forwardFromPhonemes');
return await this.nativeModule.generateFromPhonemes(phonemes, speed);
}
/**
* Starts a streaming synthesis session. Yields audio chunks as they are generated.
* @param input - Input object containing text and optional speed.
* @yields An audio chunk generated during synthesis.
* @returns An async generator yielding Float32Array audio chunks.
*/
async *stream({
speed,
stopAutomatically
}) {
// Stores computed audio segments
const queue = [];
let waiter = null;
let error;
let nativeStreamFinished = false;
this.isStreaming = true;
const wake = () => {
waiter?.();
waiter = null;
};
(async () => {
try {
await this.nativeModule.stream(speed, stopAutomatically, audio => {
queue.push(new Float32Array(audio));
wake();
});
nativeStreamFinished = true;
wake();
} catch (e) {
error = e;
nativeStreamFinished = true;
wake();
}
})();
while (this.isStreaming) {
if (queue.length > 0) {
yield queue.shift();
if (nativeStreamFinished && queue.length === 0) {
return;
}
continue;
}
if (error) throw error;
await new Promise(r => waiter = r);
}
}
/**
* Starts a streaming synthesis session from pre-computed phonemes.
* Bypasses the built-in phonemizer, allowing use of external G2P systems.
* @param input - Input object containing phonemes and optional speed.
* @yields An audio chunk generated during synthesis.
* @returns An async generator yielding Float32Array audio chunks.
*/
async *streamFromPhonemes({
phonemes,
speed
}) {
const queue = [];
let waiter = null;
let error;
let nativeStreamFinished = false;
const wake = () => {
waiter?.();
waiter = null;
};
(async () => {
try {
await this.nativeModule.streamFromPhonemes(phonemes, speed, audio => {
queue.push(new Float32Array(audio));
wake();
});
nativeStreamFinished = true;
wake();
} catch (e) {
error = e;
nativeStreamFinished = true;
wake();
}
})();
while (this.isStreaming) {
if (queue.length > 0) {
yield queue.shift();
if (nativeStreamFinished && queue.length === 0) {
return;
}
continue;
}
if (error) throw error;
await new Promise(r => waiter = r);
}
}
/**
* Inserts new text chunk into the buffer to be processed in streaming mode.
* @param textChunk - The text fragment to append to the streaming buffer.
*/
streamInsert(textChunk) {
this.nativeModule.streamInsert(textChunk);
}
/**
* Stops the streaming process if there is any ongoing.
* @param instant - If true, stops the streaming as soon as possible. Otherwise
* allows the module to complete processing for the remains of the buffer.
*/
streamStop(instant = true) {
this.nativeModule.streamStop(instant);
if (instant) {
this.isStreaming = false;
}
}
/**
* Unloads the model from memory.
*/
delete() {
if (this.nativeModule !== null) {
this.nativeModule.unload();
}
}
}
//# sourceMappingURL=TextToSpeechModule.js.map