UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

147 lines (146 loc) 6.67 kB
import { AbstractTTSClient } from "../core/abstract-tts.js"; import { EspeakNodeTTSClient } from "./espeak.js"; // Function to detect if we're in a browser environment function isBrowser() { return typeof window !== "undefined" && typeof document !== "undefined"; } // Removed meSpeak interface - no longer used /** * eSpeak TTS client for browser environments using meSpeak.js * This provides eSpeak functionality in browsers and Node.js via WebAssembly * For Node.js-only environments with better performance, use EspeakNodeTTSClient instead. */ export class EspeakBrowserTTSClient extends AbstractTTSClient { constructor(credentials = {}) { super(credentials); Object.defineProperty(this, "nodeClient", { enumerable: true, configurable: true, writable: true, value: void 0 }); // Set a default voice for eSpeak TTS this.voiceId = "en"; // Default English voice // In Node.js environments, create a fallback client if (!isBrowser()) { this.nodeClient = new EspeakNodeTTSClient(credentials); } } async synthToBytes(text, options) { // For Node.js environments, delegate to the regular eSpeak client if (!isBrowser() && this.nodeClient) { console.log("eSpeak-WASM: Delegating to Node.js eSpeak client"); return await this.nodeClient.synthToBytes(text, options); } // Browser environment - throw error for now since meSpeak is causing issues throw new Error("eSpeak-WASM browser support is currently disabled due to meSpeak compatibility issues. Use EspeakNodeTTSClient for Node.js environments."); } /** * Synthesize text to a byte stream (ReadableStream) * @param text Text to synthesize * @param options Synthesis options * @returns Promise resolving to an object containing the audio stream and an empty word boundaries array. */ async synthToBytestream(text, options) { const audioBytes = await this.synthToBytes(text, options); // "Fake" streaming by wrapping full audio in a ReadableStream const audioStream = new ReadableStream({ start(controller) { controller.enqueue(audioBytes); controller.close(); }, }); return { audioStream, wordBoundaries: [] }; } /** * Return available voices for eSpeak WASM */ async _getVoices() { // For Node.js environments, delegate to the regular eSpeak client if (!isBrowser() && this.nodeClient) { const nodeVoices = await this.nodeClient._getVoices(); // Rename them to indicate they're from eSpeak WASM (but actually using Node.js fallback) return nodeVoices.map(voice => ({ ...voice, name: voice.name.replace('(eSpeak)', '(eSpeak WASM)') })); } // meSpeak supports many languages, here's a subset of common ones const commonVoices = [ { id: "en", name: "English", language: "English" }, { id: "en-us", name: "English (US)", language: "English" }, { id: "en-rp", name: "English (RP)", language: "English" }, { id: "en-sc", name: "English (Scottish)", language: "English" }, { id: "es", name: "Spanish", language: "Spanish" }, { id: "es-la", name: "Spanish (Latin America)", language: "Spanish" }, { id: "fr", name: "French", language: "French" }, { id: "de", name: "German", language: "German" }, { id: "it", name: "Italian", language: "Italian" }, { id: "pt", name: "Portuguese (Brazil)", language: "Portuguese" }, { id: "pt-pt", name: "Portuguese (European)", language: "Portuguese" }, { id: "ru", name: "Russian", language: "Russian" }, { id: "zh", name: "Chinese (Mandarin)", language: "Chinese" }, { id: "zh-yue", name: "Chinese (Cantonese)", language: "Chinese" }, { id: "ja", name: "Japanese", language: "Japanese" }, { id: "ko", name: "Korean", language: "Korean" }, { id: "ar", name: "Arabic", language: "Arabic" }, { id: "hi", name: "Hindi", language: "Hindi" }, { id: "nl", name: "Dutch", language: "Dutch" }, { id: "sv", name: "Swedish", language: "Swedish" }, { id: "da", name: "Danish", language: "Danish" }, { id: "no", name: "Norwegian", language: "Norwegian" }, { id: "fi", name: "Finnish", language: "Finnish" }, { id: "pl", name: "Polish", language: "Polish" }, { id: "cs", name: "Czech", language: "Czech" }, { id: "hu", name: "Hungarian", language: "Hungarian" }, { id: "tr", name: "Turkish", language: "Turkish" }, { id: "he", name: "Hebrew", language: "Hebrew" }, { id: "th", name: "Thai", language: "Thai" }, { id: "vi", name: "Vietnamese", language: "Vietnamese" }, ]; const voices = commonVoices.map((voice) => ({ id: voice.id, name: `${voice.name} (eSpeak WASM)`, gender: "Unknown", // meSpeak doesn't typically provide gender info provider: "espeak-ng", languageCodes: [ { bcp47: voice.id.split("-")[0], // Use the base language code iso639_3: "", // Would need mapping display: voice.language, }, ], })); return voices; } /** * Get the list of required credential types for this engine * @returns Array of required credential field names */ getRequiredCredentials() { return []; // eSpeak doesn't require any credentials } /** * Check if credentials are valid (eSpeak doesn't need credentials) */ async checkCredentials() { // eSpeak doesn't need credentials and we have fallbacks for both environments return true; } /** * Get detailed credential validation info */ async checkCredentialsAdvanced() { return { valid: true, message: "eSpeak WASM is available with environment-specific fallbacks", details: { environment: isBrowser() ? "browser" : "node", engine: isBrowser() ? "meSpeak" : "text2wav", note: "Credentials not required for eSpeak" }, }; } } // Backward compatibility export export { EspeakBrowserTTSClient as EspeakWasmTTSClient };