UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

github.com/willwade/js-tts-wrapper

willwade/js-tts-wrapper

242 lines (241 loc) • 10.7 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.EspeakWasmTTSClient = exports.EspeakBrowserTTSClient = void 0; const abstract_tts_1 = require("../core/abstract-tts"); // Dynamic require/import for meSpeak let meSpeak = null; // Function to load meSpeak module with enhanced ESM compatibility for Next.js and other environments async function loadMeSpeak() { if (meSpeak) return meSpeak; try { if (typeof window !== "undefined") { // Browser environment - meSpeak should be loaded globally if (window.meSpeak) { meSpeak = window.meSpeak; return meSpeak; } throw new Error("meSpeak is not loaded. Please include meSpeak.js in your HTML or install the mespeak package."); } // Detect Next.js environment const isNextJS = typeof process !== "undefined" && (process.env.NEXT_RUNTIME || process.env.__NEXT_PRIVATE_ORIGIN); // Enhanced dynamic import for better ESM compatibility try { meSpeak = await Promise.resolve(`${"mespeak"}`).then(s => __importStar(require(s))); // Handle both default and named exports if (meSpeak.default) { meSpeak = meSpeak.default; } return meSpeak; } catch (importError) { // Fallback for environments where dynamic import might fail if (isNextJS) { throw new Error("mespeak package not found in Next.js environment. " + "This may be due to Next.js bundling restrictions. " + "For browser environments, include meSpeak.js in your HTML. " + "For Node.js environments, ensure mespeak is properly installed: npm install mespeak"); } throw importError; } } catch (err) { console.error("Error loading meSpeak:", err); const errorMessage = err instanceof Error ? err.message : String(err); throw new Error(`meSpeak package not found. ${errorMessage}. Please install it with: npm install mespeak`); } } /** * eSpeak TTS client for browser environments using meSpeak.js * This provides eSpeak functionality in browsers and Node.js via WebAssembly * For Node.js-only environments with better performance, use EspeakNodeTTSClient instead. */ class EspeakBrowserTTSClient extends abstract_tts_1.AbstractTTSClient { constructor(credentials = {}) { super(credentials); // Set a default voice for eSpeak TTS this.voiceId = "en"; // Default English voice } async synthToBytes(text, options) { try { // Load the meSpeak module const meSpeakModule = await loadMeSpeak(); // Prepare options for meSpeak const meSpeakOptions = { rawdata: true, // Get raw audio data instead of playing }; // Use voice from options or the default voice const voiceId = options?.voice || this.voiceId || "en"; meSpeakOptions.voice = voiceId; // Map other options to meSpeak format if (options?.rate) { // meSpeak uses speed in words per minute, default is 175 // Convert from rate (0.1-10) to WPM (50-400) const rateNum = typeof options.rate === "string" ? Number.parseFloat(options.rate) : options.rate; const rate = Math.max(0.1, Math.min(10, rateNum)); meSpeakOptions.speed = Math.round(50 + ((rate - 0.1) * (400 - 50)) / (10 - 0.1)); } if (options?.pitch) { // meSpeak uses pitch 0-99, default is 50 // Convert from pitch (0.1-2) to 0-99 const pitchNum = typeof options.pitch === "string" ? Number.parseFloat(options.pitch) : options.pitch; const pitch = Math.max(0.1, Math.min(2, pitchNum)); meSpeakOptions.pitch = Math.round(((pitch - 0.1) * 99) / (2 - 0.1)); } // Call meSpeak to generate audio with a callback return new Promise((resolve, reject) => { meSpeakModule.speak(text, meSpeakOptions, (success, _id, stream) => { if (success && stream) { resolve(new Uint8Array(stream)); } else { reject(new Error("Failed to synthesize speech with meSpeak")); } }); }); } catch (err) { console.error("eSpeak WASM TTS synthesis error:", err); throw new Error(`Failed to synthesize speech with eSpeak WASM: ${err instanceof Error ? err.message : String(err)}`); } } /** * Synthesize text to a byte stream (ReadableStream) * @param text Text to synthesize * @param options Synthesis options * @returns Promise resolving to an object containing the audio stream and an empty word boundaries array. */ async synthToBytestream(text, options) { const audioBytes = await this.synthToBytes(text, options); // "Fake" streaming by wrapping full audio in a ReadableStream const audioStream = new ReadableStream({ start(controller) { controller.enqueue(audioBytes); controller.close(); }, }); return { audioStream, wordBoundaries: [] }; } /** * Return available voices for eSpeak WASM */ async _getVoices() { // meSpeak supports many languages, here's a subset of common ones const commonVoices = [ { id: "en", name: "English", language: "English" }, { id: "en-us", name: "English (US)", language: "English" }, { id: "en-rp", name: "English (RP)", language: "English" }, { id: "en-sc", name: "English (Scottish)", language: "English" }, { id: "es", name: "Spanish", language: "Spanish" }, { id: "es-la", name: "Spanish (Latin America)", language: "Spanish" }, { id: "fr", name: "French", language: "French" }, { id: "de", name: "German", language: "German" }, { id: "it", name: "Italian", language: "Italian" }, { id: "pt", name: "Portuguese (Brazil)", language: "Portuguese" }, { id: "pt-pt", name: "Portuguese (European)", language: "Portuguese" }, { id: "ru", name: "Russian", language: "Russian" }, { id: "zh", name: "Chinese (Mandarin)", language: "Chinese" }, { id: "zh-yue", name: "Chinese (Cantonese)", language: "Chinese" }, { id: "ja", name: "Japanese", language: "Japanese" }, { id: "ko", name: "Korean", language: "Korean" }, { id: "ar", name: "Arabic", language: "Arabic" }, { id: "hi", name: "Hindi", language: "Hindi" }, { id: "nl", name: "Dutch", language: "Dutch" }, { id: "sv", name: "Swedish", language: "Swedish" }, { id: "da", name: "Danish", language: "Danish" }, { id: "no", name: "Norwegian", language: "Norwegian" }, { id: "fi", name: "Finnish", language: "Finnish" }, { id: "pl", name: "Polish", language: "Polish" }, { id: "cs", name: "Czech", language: "Czech" }, { id: "hu", name: "Hungarian", language: "Hungarian" }, { id: "tr", name: "Turkish", language: "Turkish" }, { id: "he", name: "Hebrew", language: "Hebrew" }, { id: "th", name: "Thai", language: "Thai" }, { id: "vi", name: "Vietnamese", language: "Vietnamese" }, ]; const voices = commonVoices.map((voice) => ({ id: voice.id, name: `${voice.name} (eSpeak WASM)`, gender: "Unknown", // meSpeak doesn't typically provide gender info provider: "espeak-ng", languageCodes: [ { bcp47: voice.id.split("-")[0], // Use the base language code iso639_3: "", // Would need mapping display: voice.language, }, ], })); return voices; } /** * Check if credentials are valid (eSpeak doesn't need credentials) */ async checkCredentials() { try { await loadMeSpeak(); return true; } catch { return false; } } /** * Get detailed credential validation info */ async checkCredentialsAdvanced() { try { const meSpeakModule = await loadMeSpeak(); return { valid: true, message: "eSpeak WASM is available and ready to use", details: { version: meSpeakModule.version || "unknown", environment: typeof window !== "undefined" ? "browser" : "node", }, }; } catch (err) { return { valid: false, message: `eSpeak WASM not available: ${err instanceof Error ? err.message : String(err)}`, }; } } } exports.EspeakBrowserTTSClient = EspeakBrowserTTSClient; exports.EspeakWasmTTSClient = EspeakBrowserTTSClient;