js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
242 lines (241 loc) • 10.7 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.EspeakWasmTTSClient = exports.EspeakBrowserTTSClient = void 0;
const abstract_tts_1 = require("../core/abstract-tts");
// Dynamic require/import for meSpeak
let meSpeak = null;
// Function to load meSpeak module with enhanced ESM compatibility for Next.js and other environments
async function loadMeSpeak() {
if (meSpeak)
return meSpeak;
try {
if (typeof window !== "undefined") {
// Browser environment - meSpeak should be loaded globally
if (window.meSpeak) {
meSpeak = window.meSpeak;
return meSpeak;
}
throw new Error("meSpeak is not loaded. Please include meSpeak.js in your HTML or install the mespeak package.");
}
// Detect Next.js environment
const isNextJS = typeof process !== "undefined" &&
(process.env.NEXT_RUNTIME || process.env.__NEXT_PRIVATE_ORIGIN);
// Enhanced dynamic import for better ESM compatibility
try {
meSpeak = await Promise.resolve(`${"mespeak"}`).then(s => __importStar(require(s)));
// Handle both default and named exports
if (meSpeak.default) {
meSpeak = meSpeak.default;
}
return meSpeak;
}
catch (importError) {
// Fallback for environments where dynamic import might fail
if (isNextJS) {
throw new Error("mespeak package not found in Next.js environment. " +
"This may be due to Next.js bundling restrictions. " +
"For browser environments, include meSpeak.js in your HTML. " +
"For Node.js environments, ensure mespeak is properly installed: npm install mespeak");
}
throw importError;
}
}
catch (err) {
console.error("Error loading meSpeak:", err);
const errorMessage = err instanceof Error ? err.message : String(err);
throw new Error(`meSpeak package not found. ${errorMessage}. Please install it with: npm install mespeak`);
}
}
/**
* eSpeak TTS client for browser environments using meSpeak.js
* This provides eSpeak functionality in browsers and Node.js via WebAssembly
* For Node.js-only environments with better performance, use EspeakNodeTTSClient instead.
*/
class EspeakBrowserTTSClient extends abstract_tts_1.AbstractTTSClient {
constructor(credentials = {}) {
super(credentials);
// Set a default voice for eSpeak TTS
this.voiceId = "en"; // Default English voice
}
async synthToBytes(text, options) {
try {
// Load the meSpeak module
const meSpeakModule = await loadMeSpeak();
// Prepare options for meSpeak
const meSpeakOptions = {
rawdata: true, // Get raw audio data instead of playing
};
// Use voice from options or the default voice
const voiceId = options?.voice || this.voiceId || "en";
meSpeakOptions.voice = voiceId;
// Map other options to meSpeak format
if (options?.rate) {
// meSpeak uses speed in words per minute, default is 175
// Convert from rate (0.1-10) to WPM (50-400)
const rateNum = typeof options.rate === "string" ? Number.parseFloat(options.rate) : options.rate;
const rate = Math.max(0.1, Math.min(10, rateNum));
meSpeakOptions.speed = Math.round(50 + ((rate - 0.1) * (400 - 50)) / (10 - 0.1));
}
if (options?.pitch) {
// meSpeak uses pitch 0-99, default is 50
// Convert from pitch (0.1-2) to 0-99
const pitchNum = typeof options.pitch === "string" ? Number.parseFloat(options.pitch) : options.pitch;
const pitch = Math.max(0.1, Math.min(2, pitchNum));
meSpeakOptions.pitch = Math.round(((pitch - 0.1) * 99) / (2 - 0.1));
}
// Call meSpeak to generate audio with a callback
return new Promise((resolve, reject) => {
meSpeakModule.speak(text, meSpeakOptions, (success, _id, stream) => {
if (success && stream) {
resolve(new Uint8Array(stream));
}
else {
reject(new Error("Failed to synthesize speech with meSpeak"));
}
});
});
}
catch (err) {
console.error("eSpeak WASM TTS synthesis error:", err);
throw new Error(`Failed to synthesize speech with eSpeak WASM: ${err instanceof Error ? err.message : String(err)}`);
}
}
/**
* Synthesize text to a byte stream (ReadableStream)
* @param text Text to synthesize
* @param options Synthesis options
* @returns Promise resolving to an object containing the audio stream and an empty word boundaries array.
*/
async synthToBytestream(text, options) {
const audioBytes = await this.synthToBytes(text, options);
// "Fake" streaming by wrapping full audio in a ReadableStream
const audioStream = new ReadableStream({
start(controller) {
controller.enqueue(audioBytes);
controller.close();
},
});
return { audioStream, wordBoundaries: [] };
}
/**
* Return available voices for eSpeak WASM
*/
async _getVoices() {
// meSpeak supports many languages, here's a subset of common ones
const commonVoices = [
{ id: "en", name: "English", language: "English" },
{ id: "en-us", name: "English (US)", language: "English" },
{ id: "en-rp", name: "English (RP)", language: "English" },
{ id: "en-sc", name: "English (Scottish)", language: "English" },
{ id: "es", name: "Spanish", language: "Spanish" },
{ id: "es-la", name: "Spanish (Latin America)", language: "Spanish" },
{ id: "fr", name: "French", language: "French" },
{ id: "de", name: "German", language: "German" },
{ id: "it", name: "Italian", language: "Italian" },
{ id: "pt", name: "Portuguese (Brazil)", language: "Portuguese" },
{ id: "pt-pt", name: "Portuguese (European)", language: "Portuguese" },
{ id: "ru", name: "Russian", language: "Russian" },
{ id: "zh", name: "Chinese (Mandarin)", language: "Chinese" },
{ id: "zh-yue", name: "Chinese (Cantonese)", language: "Chinese" },
{ id: "ja", name: "Japanese", language: "Japanese" },
{ id: "ko", name: "Korean", language: "Korean" },
{ id: "ar", name: "Arabic", language: "Arabic" },
{ id: "hi", name: "Hindi", language: "Hindi" },
{ id: "nl", name: "Dutch", language: "Dutch" },
{ id: "sv", name: "Swedish", language: "Swedish" },
{ id: "da", name: "Danish", language: "Danish" },
{ id: "no", name: "Norwegian", language: "Norwegian" },
{ id: "fi", name: "Finnish", language: "Finnish" },
{ id: "pl", name: "Polish", language: "Polish" },
{ id: "cs", name: "Czech", language: "Czech" },
{ id: "hu", name: "Hungarian", language: "Hungarian" },
{ id: "tr", name: "Turkish", language: "Turkish" },
{ id: "he", name: "Hebrew", language: "Hebrew" },
{ id: "th", name: "Thai", language: "Thai" },
{ id: "vi", name: "Vietnamese", language: "Vietnamese" },
];
const voices = commonVoices.map((voice) => ({
id: voice.id,
name: `${voice.name} (eSpeak WASM)`,
gender: "Unknown", // meSpeak doesn't typically provide gender info
provider: "espeak-ng",
languageCodes: [
{
bcp47: voice.id.split("-")[0], // Use the base language code
iso639_3: "", // Would need mapping
display: voice.language,
},
],
}));
return voices;
}
/**
* Check if credentials are valid (eSpeak doesn't need credentials)
*/
async checkCredentials() {
try {
await loadMeSpeak();
return true;
}
catch {
return false;
}
}
/**
* Get detailed credential validation info
*/
async checkCredentialsAdvanced() {
try {
const meSpeakModule = await loadMeSpeak();
return {
valid: true,
message: "eSpeak WASM is available and ready to use",
details: {
version: meSpeakModule.version || "unknown",
environment: typeof window !== "undefined" ? "browser" : "node",
},
};
}
catch (err) {
return {
valid: false,
message: `eSpeak WASM not available: ${err instanceof Error ? err.message : String(err)}`,
};
}
}
}
exports.EspeakBrowserTTSClient = EspeakBrowserTTSClient;
exports.EspeakWasmTTSClient = EspeakBrowserTTSClient;
;