js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
147 lines (146 loc) • 6.67 kB
JavaScript
import { AbstractTTSClient } from "../core/abstract-tts.js";
import { EspeakNodeTTSClient } from "./espeak.js";
// Function to detect if we're in a browser environment
function isBrowser() {
return typeof window !== "undefined" && typeof document !== "undefined";
}
// Removed meSpeak interface - no longer used
/**
* eSpeak TTS client for browser environments using meSpeak.js
* This provides eSpeak functionality in browsers and Node.js via WebAssembly
* For Node.js-only environments with better performance, use EspeakNodeTTSClient instead.
*/
export class EspeakBrowserTTSClient extends AbstractTTSClient {
constructor(credentials = {}) {
super(credentials);
Object.defineProperty(this, "nodeClient", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
// Set a default voice for eSpeak TTS
this.voiceId = "en"; // Default English voice
// In Node.js environments, create a fallback client
if (!isBrowser()) {
this.nodeClient = new EspeakNodeTTSClient(credentials);
}
}
async synthToBytes(text, options) {
// For Node.js environments, delegate to the regular eSpeak client
if (!isBrowser() && this.nodeClient) {
console.log("eSpeak-WASM: Delegating to Node.js eSpeak client");
return await this.nodeClient.synthToBytes(text, options);
}
// Browser environment - throw error for now since meSpeak is causing issues
throw new Error("eSpeak-WASM browser support is currently disabled due to meSpeak compatibility issues. Use EspeakNodeTTSClient for Node.js environments.");
}
/**
* Synthesize text to a byte stream (ReadableStream)
* @param text Text to synthesize
* @param options Synthesis options
* @returns Promise resolving to an object containing the audio stream and an empty word boundaries array.
*/
async synthToBytestream(text, options) {
const audioBytes = await this.synthToBytes(text, options);
// "Fake" streaming by wrapping full audio in a ReadableStream
const audioStream = new ReadableStream({
start(controller) {
controller.enqueue(audioBytes);
controller.close();
},
});
return { audioStream, wordBoundaries: [] };
}
/**
* Return available voices for eSpeak WASM
*/
async _getVoices() {
// For Node.js environments, delegate to the regular eSpeak client
if (!isBrowser() && this.nodeClient) {
const nodeVoices = await this.nodeClient._getVoices();
// Rename them to indicate they're from eSpeak WASM (but actually using Node.js fallback)
return nodeVoices.map(voice => ({
...voice,
name: voice.name.replace('(eSpeak)', '(eSpeak WASM)')
}));
}
// meSpeak supports many languages, here's a subset of common ones
const commonVoices = [
{ id: "en", name: "English", language: "English" },
{ id: "en-us", name: "English (US)", language: "English" },
{ id: "en-rp", name: "English (RP)", language: "English" },
{ id: "en-sc", name: "English (Scottish)", language: "English" },
{ id: "es", name: "Spanish", language: "Spanish" },
{ id: "es-la", name: "Spanish (Latin America)", language: "Spanish" },
{ id: "fr", name: "French", language: "French" },
{ id: "de", name: "German", language: "German" },
{ id: "it", name: "Italian", language: "Italian" },
{ id: "pt", name: "Portuguese (Brazil)", language: "Portuguese" },
{ id: "pt-pt", name: "Portuguese (European)", language: "Portuguese" },
{ id: "ru", name: "Russian", language: "Russian" },
{ id: "zh", name: "Chinese (Mandarin)", language: "Chinese" },
{ id: "zh-yue", name: "Chinese (Cantonese)", language: "Chinese" },
{ id: "ja", name: "Japanese", language: "Japanese" },
{ id: "ko", name: "Korean", language: "Korean" },
{ id: "ar", name: "Arabic", language: "Arabic" },
{ id: "hi", name: "Hindi", language: "Hindi" },
{ id: "nl", name: "Dutch", language: "Dutch" },
{ id: "sv", name: "Swedish", language: "Swedish" },
{ id: "da", name: "Danish", language: "Danish" },
{ id: "no", name: "Norwegian", language: "Norwegian" },
{ id: "fi", name: "Finnish", language: "Finnish" },
{ id: "pl", name: "Polish", language: "Polish" },
{ id: "cs", name: "Czech", language: "Czech" },
{ id: "hu", name: "Hungarian", language: "Hungarian" },
{ id: "tr", name: "Turkish", language: "Turkish" },
{ id: "he", name: "Hebrew", language: "Hebrew" },
{ id: "th", name: "Thai", language: "Thai" },
{ id: "vi", name: "Vietnamese", language: "Vietnamese" },
];
const voices = commonVoices.map((voice) => ({
id: voice.id,
name: `${voice.name} (eSpeak WASM)`,
gender: "Unknown", // meSpeak doesn't typically provide gender info
provider: "espeak-ng",
languageCodes: [
{
bcp47: voice.id.split("-")[0], // Use the base language code
iso639_3: "", // Would need mapping
display: voice.language,
},
],
}));
return voices;
}
/**
* Get the list of required credential types for this engine
* @returns Array of required credential field names
*/
getRequiredCredentials() {
return []; // eSpeak doesn't require any credentials
}
/**
* Check if credentials are valid (eSpeak doesn't need credentials)
*/
async checkCredentials() {
// eSpeak doesn't need credentials and we have fallbacks for both environments
return true;
}
/**
* Get detailed credential validation info
*/
async checkCredentialsAdvanced() {
return {
valid: true,
message: "eSpeak WASM is available with environment-specific fallbacks",
details: {
environment: isBrowser() ? "browser" : "node",
engine: isBrowser() ? "meSpeak" : "text2wav",
note: "Credentials not required for eSpeak"
},
};
}
}
// Backward compatibility export
export { EspeakBrowserTTSClient as EspeakWasmTTSClient };