UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

304 lines (303 loc) 10.8 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.ModelsLabTTSClient = void 0; const abstract_tts_1 = require("../core/abstract-tts"); const SSMLUtils = __importStar(require("../core/ssml-utils")); const SpeechMarkdown = __importStar(require("../markdown/converter")); const fetch_utils_1 = require("../utils/fetch-utils"); /** Static list of available voices */ const MODELSLAB_VOICES = [ // Emotion-capable female voices { id: "madison", name: "Madison", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "tara", name: "Tara", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "leah", name: "Leah", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "jess", name: "Jess", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "mia", name: "Mia", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "zoe", name: "Zoe", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, // Emotion-capable male voices { id: "leo", name: "Leo", gender: "Male", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "dan", name: "Dan", gender: "Male", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "zac", name: "Zac", gender: "Male", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, ]; const API_URL = "https://modelslab.com/api/v6/voice/text_to_speech"; const DEFAULT_VOICE = "madison"; const DEFAULT_LANGUAGE = "american english"; const POLL_INTERVAL_MS = 2000; const MAX_POLL_ATTEMPTS = 20; /** * ModelsLab TTS Client * * Provides text-to-speech via the ModelsLab Voice API. * API docs: https://docs.modelslab.com/voice-cloning/text-to-speech * * @example * ```ts * const client = new ModelsLabTTSClient({ apiKey: "your-api-key" }); * await client.synthToFile("Hello world!", "output.mp3"); * ``` */ class ModelsLabTTSClient extends abstract_tts_1.AbstractTTSClient { constructor(credentials = {}) { super(credentials); Object.defineProperty(this, "apiKey", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "defaultLanguage", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "defaultSpeed", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "sampleRate", { enumerable: true, configurable: true, writable: true, value: 24000 }); this._models = [{ id: "modelslab", features: [] }]; this.apiKey = credentials.apiKey || (typeof process !== "undefined" ? (process.env.MODELSLAB_API_KEY ?? "") : ""); this.defaultLanguage = DEFAULT_LANGUAGE; this.defaultSpeed = 1.0; if (!this.voiceId) { this.voiceId = DEFAULT_VOICE; } } /** Check if credentials are present */ async checkCredentials() { if (!this.apiKey) { console.error("ModelsLab API key is required. Set MODELSLAB_API_KEY or pass apiKey."); return false; } return true; } getRequiredCredentials() { return ["apiKey"]; } async _getVoices() { return MODELSLAB_VOICES; } /** * Synthesize text to audio bytes (Uint8Array). * Handles async generation — polls until audio is ready. */ async synthToBytes(text, options = {}) { const { audioStream } = await this.synthToBytestream(text, options); const reader = audioStream.getReader(); const chunks = []; while (true) { const { done, value } = await reader.read(); if (done) break; chunks.push(value); } const totalLen = chunks.reduce((n, c) => n + c.length, 0); const out = new Uint8Array(totalLen); let offset = 0; for (const chunk of chunks) { out.set(chunk, offset); offset += chunk.length; } return out; } /** * Synthesize text to a ReadableStream of audio chunks. */ async synthToBytestream(text, options = {}) { let processedText = text; // Convert SpeechMarkdown → SSML → plain text if needed if (options.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) { const ssml = await SpeechMarkdown.toSSML(processedText); processedText = SSMLUtils.stripSSML(ssml); } else if (SSMLUtils.isSSML(processedText)) { // ModelsLab doesn't support SSML — strip tags processedText = SSMLUtils.stripSSML(processedText); } const voiceId = options.voice || this.voiceId || DEFAULT_VOICE; this.voiceId = voiceId; const speed = options.speed ?? this.defaultSpeed; const language = options.language ?? this.defaultLanguage; const audioBytes = await this._synthesize(processedText, voiceId, language, speed, options.emotion ?? false); const audioStream = new ReadableStream({ start(controller) { controller.enqueue(audioBytes); controller.close(); }, }); return { audioStream, wordBoundaries: [] }; } /** Internal: call ModelsLab API and return audio bytes. */ async _synthesize(text, voiceId, language, speed, emotion) { const fetch = (0, fetch_utils_1.getFetch)(); const resp = await fetch(API_URL, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ key: this.apiKey, prompt: text, language, voice_id: voiceId, speed, emotion, }), }); if (!resp.ok) { throw new Error(`ModelsLab API error: ${resp.status} ${resp.statusText}`); } const data = (await resp.json()); if (data.status === "error") { throw new Error(`ModelsLab TTS error: ${data.message ?? JSON.stringify(data)}`); } let audioUrl; if (data.status === "success" && data.output?.length) { audioUrl = data.output[0]; } else if (data.status === "processing") { const fetchUrl = data.fetch_result ?? data.link; if (!fetchUrl) { throw new Error("ModelsLab returned processing status with no fetch URL"); } audioUrl = await this._poll(fetchUrl, fetch); } else { throw new Error(`Unexpected ModelsLab status: ${data.status}`); } if (!audioUrl) { throw new Error("ModelsLab returned no audio URL"); } return this._downloadAudio(audioUrl, fetch); } /** Poll the fetch_result URL until audio is ready. */ async _poll(fetchUrl, fetch) { for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt++) { await this._sleep(POLL_INTERVAL_MS); const resp = await fetch(fetchUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ key: this.apiKey }), }); if (!resp.ok) continue; const data = (await resp.json()); if (data.status === "success" && data.output?.length) { return data.output[0]; } if (data.status === "error") { throw new Error(`ModelsLab poll error: ${data.message}`); } } throw new Error(`ModelsLab audio generation timed out after ${MAX_POLL_ATTEMPTS} attempts`); } /** Download audio from URL and return as Uint8Array. */ async _downloadAudio(url, fetch) { const resp = await fetch(url); if (!resp.ok) { throw new Error(`Failed to download audio: ${resp.status} ${resp.statusText}`); } const buf = await resp.arrayBuffer(); return new Uint8Array(buf); } _sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } } exports.ModelsLabTTSClient = ModelsLabTTSClient; exports.default = ModelsLabTTSClient;