js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
304 lines (303 loc) • 10.8 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.ModelsLabTTSClient = void 0;
const abstract_tts_1 = require("../core/abstract-tts");
const SSMLUtils = __importStar(require("../core/ssml-utils"));
const SpeechMarkdown = __importStar(require("../markdown/converter"));
const fetch_utils_1 = require("../utils/fetch-utils");
/** Static list of available voices */
const MODELSLAB_VOICES = [
// Emotion-capable female voices
{
id: "madison",
name: "Madison",
gender: "Female",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "tara",
name: "Tara",
gender: "Female",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "leah",
name: "Leah",
gender: "Female",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "jess",
name: "Jess",
gender: "Female",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "mia",
name: "Mia",
gender: "Female",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "zoe",
name: "Zoe",
gender: "Female",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
// Emotion-capable male voices
{
id: "leo",
name: "Leo",
gender: "Male",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "dan",
name: "Dan",
gender: "Male",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
{
id: "zac",
name: "Zac",
gender: "Male",
provider: "modelslab",
languageCodes: [{ bcp47: "en-US", iso639_3: "eng", display: "English (US)" }],
},
];
const API_URL = "https://modelslab.com/api/v6/voice/text_to_speech";
const DEFAULT_VOICE = "madison";
const DEFAULT_LANGUAGE = "american english";
const POLL_INTERVAL_MS = 2000;
const MAX_POLL_ATTEMPTS = 20;
/**
* ModelsLab TTS Client
*
* Provides text-to-speech via the ModelsLab Voice API.
* API docs: https://docs.modelslab.com/voice-cloning/text-to-speech
*
* @example
* ```ts
* const client = new ModelsLabTTSClient({ apiKey: "your-api-key" });
* await client.synthToFile("Hello world!", "output.mp3");
* ```
*/
class ModelsLabTTSClient extends abstract_tts_1.AbstractTTSClient {
constructor(credentials = {}) {
super(credentials);
Object.defineProperty(this, "apiKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "defaultLanguage", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "defaultSpeed", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "sampleRate", {
enumerable: true,
configurable: true,
writable: true,
value: 24000
});
this._models = [{ id: "modelslab", features: [] }];
this.apiKey =
credentials.apiKey ||
(typeof process !== "undefined" ? (process.env.MODELSLAB_API_KEY ?? "") : "");
this.defaultLanguage = DEFAULT_LANGUAGE;
this.defaultSpeed = 1.0;
if (!this.voiceId) {
this.voiceId = DEFAULT_VOICE;
}
}
/** Check if credentials are present */
async checkCredentials() {
if (!this.apiKey) {
console.error("ModelsLab API key is required. Set MODELSLAB_API_KEY or pass apiKey.");
return false;
}
return true;
}
getRequiredCredentials() {
return ["apiKey"];
}
async _getVoices() {
return MODELSLAB_VOICES;
}
/**
* Synthesize text to audio bytes (Uint8Array).
* Handles async generation — polls until audio is ready.
*/
async synthToBytes(text, options = {}) {
const { audioStream } = await this.synthToBytestream(text, options);
const reader = audioStream.getReader();
const chunks = [];
while (true) {
const { done, value } = await reader.read();
if (done)
break;
chunks.push(value);
}
const totalLen = chunks.reduce((n, c) => n + c.length, 0);
const out = new Uint8Array(totalLen);
let offset = 0;
for (const chunk of chunks) {
out.set(chunk, offset);
offset += chunk.length;
}
return out;
}
/**
* Synthesize text to a ReadableStream of audio chunks.
*/
async synthToBytestream(text, options = {}) {
let processedText = text;
// Convert SpeechMarkdown → SSML → plain text if needed
if (options.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
const ssml = await SpeechMarkdown.toSSML(processedText);
processedText = SSMLUtils.stripSSML(ssml);
}
else if (SSMLUtils.isSSML(processedText)) {
// ModelsLab doesn't support SSML — strip tags
processedText = SSMLUtils.stripSSML(processedText);
}
const voiceId = options.voice || this.voiceId || DEFAULT_VOICE;
this.voiceId = voiceId;
const speed = options.speed ?? this.defaultSpeed;
const language = options.language ?? this.defaultLanguage;
const audioBytes = await this._synthesize(processedText, voiceId, language, speed, options.emotion ?? false);
const audioStream = new ReadableStream({
start(controller) {
controller.enqueue(audioBytes);
controller.close();
},
});
return { audioStream, wordBoundaries: [] };
}
/** Internal: call ModelsLab API and return audio bytes. */
async _synthesize(text, voiceId, language, speed, emotion) {
const fetch = (0, fetch_utils_1.getFetch)();
const resp = await fetch(API_URL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
key: this.apiKey,
prompt: text,
language,
voice_id: voiceId,
speed,
emotion,
}),
});
if (!resp.ok) {
throw new Error(`ModelsLab API error: ${resp.status} ${resp.statusText}`);
}
const data = (await resp.json());
if (data.status === "error") {
throw new Error(`ModelsLab TTS error: ${data.message ?? JSON.stringify(data)}`);
}
let audioUrl;
if (data.status === "success" && data.output?.length) {
audioUrl = data.output[0];
}
else if (data.status === "processing") {
const fetchUrl = data.fetch_result ?? data.link;
if (!fetchUrl) {
throw new Error("ModelsLab returned processing status with no fetch URL");
}
audioUrl = await this._poll(fetchUrl, fetch);
}
else {
throw new Error(`Unexpected ModelsLab status: ${data.status}`);
}
if (!audioUrl) {
throw new Error("ModelsLab returned no audio URL");
}
return this._downloadAudio(audioUrl, fetch);
}
/** Poll the fetch_result URL until audio is ready. */
async _poll(fetchUrl, fetch) {
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt++) {
await this._sleep(POLL_INTERVAL_MS);
const resp = await fetch(fetchUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ key: this.apiKey }),
});
if (!resp.ok)
continue;
const data = (await resp.json());
if (data.status === "success" && data.output?.length) {
return data.output[0];
}
if (data.status === "error") {
throw new Error(`ModelsLab poll error: ${data.message}`);
}
}
throw new Error(`ModelsLab audio generation timed out after ${MAX_POLL_ATTEMPTS} attempts`);
}
/** Download audio from URL and return as Uint8Array. */
async _downloadAudio(url, fetch) {
const resp = await fetch(url);
if (!resp.ok) {
throw new Error(`Failed to download audio: ${resp.status} ${resp.statusText}`);
}
const buf = await resp.arrayBuffer();
return new Uint8Array(buf);
}
_sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
exports.ModelsLabTTSClient = ModelsLabTTSClient;
exports.default = ModelsLabTTSClient;