UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

267 lines (266 loc) 9.24 kB
import { AbstractTTSClient } from "../core/abstract-tts.js"; import * as SSMLUtils from "../core/ssml-utils.js"; import * as SpeechMarkdown from "../markdown/converter.js"; import { getFetch } from "../utils/fetch-utils.js"; /** Static list of available voices */ const MODELSLAB_VOICES = [ // Emotion-capable female voices { id: "madison", name: "Madison", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "tara", name: "Tara", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "leah", name: "Leah", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "jess", name: "Jess", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "mia", name: "Mia", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "zoe", name: "Zoe", gender: "Female", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, // Emotion-capable male voices { id: "leo", name: "Leo", gender: "Male", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "dan", name: "Dan", gender: "Male", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, { id: "zac", name: "Zac", gender: "Male", provider: "modelslab", languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }], }, ]; const API_URL = "https://modelslab.com/api/v6/voice/text_to_speech"; const DEFAULT_VOICE = "madison"; const DEFAULT_LANGUAGE = "american english"; const POLL_INTERVAL_MS = 2000; const MAX_POLL_ATTEMPTS = 20; /** * ModelsLab TTS Client * * Provides text-to-speech via the ModelsLab Voice API. * API docs: https://docs.modelslab.com/voice-cloning/text-to-speech * * @example * ```ts * const client = new ModelsLabTTSClient({ apiKey: "your-api-key" }); * await client.synthToFile("Hello world!", "output.mp3"); * ``` */ export class ModelsLabTTSClient extends AbstractTTSClient { constructor(credentials = {}) { super(credentials); Object.defineProperty(this, "apiKey", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "defaultLanguage", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "defaultSpeed", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "sampleRate", { enumerable: true, configurable: true, writable: true, value: 24000 }); this._models = [{ id: "modelslab", features: [] }]; this.apiKey = credentials.apiKey || (typeof process !== "undefined" ? (process.env.MODELSLAB_API_KEY ?? "") : ""); this.defaultLanguage = DEFAULT_LANGUAGE; this.defaultSpeed = 1.0; if (!this.voiceId) { this.voiceId = DEFAULT_VOICE; } } /** Check if credentials are present */ async checkCredentials() { if (!this.apiKey) { console.error("ModelsLab API key is required. Set MODELSLAB_API_KEY or pass apiKey."); return false; } return true; } getRequiredCredentials() { return ["apiKey"]; } async _getVoices() { return MODELSLAB_VOICES; } /** * Synthesize text to audio bytes (Uint8Array). * Handles async generation — polls until audio is ready. */ async synthToBytes(text, options = {}) { const { audioStream } = await this.synthToBytestream(text, options); const reader = audioStream.getReader(); const chunks = []; while (true) { const { done, value } = await reader.read(); if (done) break; chunks.push(value); } const totalLen = chunks.reduce((n, c) => n + c.length, 0); const out = new Uint8Array(totalLen); let offset = 0; for (const chunk of chunks) { out.set(chunk, offset); offset += chunk.length; } return out; } /** * Synthesize text to a ReadableStream of audio chunks. */ async synthToBytestream(text, options = {}) { let processedText = text; // Convert SpeechMarkdown → SSML → plain text if needed if (options.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) { const ssml = await SpeechMarkdown.toSSML(processedText); processedText = SSMLUtils.stripSSML(ssml); } else if (SSMLUtils.isSSML(processedText)) { // ModelsLab doesn't support SSML — strip tags processedText = SSMLUtils.stripSSML(processedText); } const voiceId = options.voice || this.voiceId || DEFAULT_VOICE; this.voiceId = voiceId; const speed = options.speed ?? this.defaultSpeed; const language = options.language ?? this.defaultLanguage; const audioBytes = await this._synthesize(processedText, voiceId, language, speed, options.emotion ?? false); const audioStream = new ReadableStream({ start(controller) { controller.enqueue(audioBytes); controller.close(); }, }); return { audioStream, wordBoundaries: [] }; } /** Internal: call ModelsLab API and return audio bytes. */ async _synthesize(text, voiceId, language, speed, emotion) { const fetch = getFetch(); const resp = await fetch(API_URL, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ key: this.apiKey, prompt: text, language, voice_id: voiceId, speed, emotion, }), }); if (!resp.ok) { throw new Error(`ModelsLab API error: ${resp.status} ${resp.statusText}`); } const data = (await resp.json()); if (data.status === "error") { throw new Error(`ModelsLab TTS error: ${data.message ?? JSON.stringify(data)}`); } let audioUrl; if (data.status === "success" && data.output?.length) { audioUrl = data.output[0]; } else if (data.status === "processing") { const fetchUrl = data.fetch_result ?? data.link; if (!fetchUrl) { throw new Error("ModelsLab returned processing status with no fetch URL"); } audioUrl = await this._poll(fetchUrl, fetch); } else { throw new Error(`Unexpected ModelsLab status: ${data.status}`); } if (!audioUrl) { throw new Error("ModelsLab returned no audio URL"); } return this._downloadAudio(audioUrl, fetch); } /** Poll the fetch_result URL until audio is ready. */ async _poll(fetchUrl, fetch) { for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt++) { await this._sleep(POLL_INTERVAL_MS); const resp = await fetch(fetchUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ key: this.apiKey }), }); if (!resp.ok) continue; const data = (await resp.json()); if (data.status === "success" && data.output?.length) { return data.output[0]; } if (data.status === "error") { throw new Error(`ModelsLab poll error: ${data.message}`); } } throw new Error(`ModelsLab audio generation timed out after ${MAX_POLL_ATTEMPTS} attempts`); } /** Download audio from URL and return as Uint8Array. */ async _downloadAudio(url, fetch) { const resp = await fetch(url); if (!resp.ok) { throw new Error(`Failed to download audio: ${resp.status} ${resp.statusText}`); } const buf = await resp.arrayBuffer(); return new Uint8Array(buf); } _sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } } export default ModelsLabTTSClient;