js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
331 lines (330 loc) • 13.1 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.MistralTTSClient = void 0;
const abstract_tts_1 = require("../core/abstract-tts");
const SSMLUtils = __importStar(require("../core/ssml-utils"));
const SpeechMarkdown = __importStar(require("../markdown/converter"));
const base64_utils_1 = require("../utils/base64-utils");
const fetch_utils_1 = require("../utils/fetch-utils");
const language_utils_1 = require("../utils/language-utils");
const fetch = (0, fetch_utils_1.getFetch)();
class MistralTTSClient extends abstract_tts_1.AbstractTTSClient {
constructor(credentials = {}) {
super(credentials);
Object.defineProperty(this, "apiKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "baseUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "model", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "responseFormat", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.apiKey = credentials.apiKey || process.env.MISTRAL_API_KEY || "";
this.baseUrl = credentials.baseURL || "https://api.mistral.ai/v1";
this.model = credentials.model || "voxtral-mini-tts-2603";
this.voiceId = "";
this.responseFormat = "mp3";
this._models = [
{
id: "voxtral-mini-tts-2603",
features: ["streaming", "inline-voice-cloning", "open-source"],
},
];
this.sampleRate = 24000;
this.applyCredentialProperties(credentials);
}
applyCredentialProperties(credentials) {
const rawProps = credentials.properties ??
credentials.propertiesJson ??
credentials.propertiesJSON;
if (rawProps) {
let parsed = null;
if (typeof rawProps === "string") {
try {
parsed = JSON.parse(rawProps);
}
catch {
/* ignore */
}
}
else if (typeof rawProps === "object") {
parsed = rawProps;
}
if (parsed) {
for (const [key, value] of Object.entries(parsed)) {
this.setProperty(key, value);
}
}
}
}
async prepareText(text, options) {
let processedText = text;
if (options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
const ssml = await SpeechMarkdown.toSSML(processedText, "w3c");
processedText = SSMLUtils.stripSSML(ssml);
}
if (SSMLUtils.isSSML(processedText)) {
processedText = SSMLUtils.stripSSML(processedText);
}
return processedText;
}
setModel(model) {
this.model = model;
}
setVoice(voiceId) {
this.voiceId = voiceId;
}
getProperty(property) {
switch (property) {
case "model":
return this.model;
case "voice":
return this.voiceId;
case "responseFormat":
return this.responseFormat;
default:
return super.getProperty(property);
}
}
setProperty(property, value) {
switch (property) {
case "model":
this.setModel(value);
break;
case "voice":
this.setVoice(value);
break;
case "responseFormat":
this.responseFormat = value;
break;
default:
super.setProperty(property, value);
break;
}
}
async checkCredentials() {
if (!this.apiKey)
return false;
try {
const response = await fetch(`${this.baseUrl}/models`, {
method: "GET",
headers: {
Authorization: `Bearer ${this.apiKey}`,
},
});
return response.ok;
}
catch {
return false;
}
}
getRequiredCredentials() {
return ["apiKey"];
}
async _getVoices() {
return MistralTTSClient.VOICES;
}
async _mapVoicesToUnified(rawVoices) {
return rawVoices.map((voice) => ({
id: voice.id,
name: voice.name,
gender: voice.gender,
languageCodes: [
{
bcp47: voice.language || "en-US",
iso639_3: (0, language_utils_1.toIso639_3)(voice.language || "en-US"),
display: (0, language_utils_1.toLanguageDisplay)(voice.language || "en-US"),
},
],
provider: "mistral",
}));
}
async synthToBytes(text, options = {}) {
const preparedText = await this.prepareText(text, options);
const modelId = options.model || this.model;
const voiceId = options.voice || this.voiceId;
const body = {
response_format: options.responseFormat || this.responseFormat,
...options.providerOptions,
model: modelId,
input: preparedText,
};
if (voiceId) {
body.voice_id = voiceId;
}
const response = await fetch(`${this.baseUrl}/audio/speech`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(body),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`);
}
const json = (await response.json());
this._createEstimatedWordTimings(preparedText);
return (0, base64_utils_1.base64ToUint8Array)(json.audio_data);
}
async synthToBytestream(text, options = {}) {
const preparedText = await this.prepareText(text, options);
const modelId = options.model || this.model;
const voiceId = options.voice || this.voiceId;
const body = {
response_format: options.responseFormat || this.responseFormat,
...options.providerOptions,
model: modelId,
input: preparedText,
stream: true,
};
if (voiceId) {
body.voice_id = voiceId;
}
const response = await fetch(`${this.baseUrl}/audio/speech`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
Accept: "text/event-stream",
},
body: JSON.stringify(body),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`);
}
if (!response.body) {
const bytes = await this.synthToBytes(text, options);
const readableStream = new ReadableStream({
start(controller) {
controller.enqueue(bytes);
controller.close();
},
});
return { audioStream: readableStream, wordBoundaries: [] };
}
const sseStream = this.parseSseBase64Stream(response.body);
return { audioStream: sseStream, wordBoundaries: [] };
}
parseSseBase64Stream(body) {
const reader = body.getReader();
const decoder = new TextDecoder();
let buffer = "";
return new ReadableStream({
async pull(controller) {
while (true) {
const { done, value } = await reader.read();
if (done) {
controller.close();
return;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
if (!line.startsWith("data: "))
continue;
const data = line.slice(6).trim();
if (!data || data === "[DONE]")
continue;
try {
const json = JSON.parse(data);
if (json.type === "speech.audio.delta" && typeof json.audio_data === "string") {
controller.enqueue((0, base64_utils_1.base64ToUint8Array)(json.audio_data));
}
}
catch {
/* skip malformed */
}
}
}
},
});
}
}
exports.MistralTTSClient = MistralTTSClient;
Object.defineProperty(MistralTTSClient, "VOICES", {
enumerable: true,
configurable: true,
writable: true,
value: [
{ id: "Amalthea", name: "Amalthea", gender: "Unknown", language: "en-US" },
{ id: "Achan", name: "Achan", gender: "Unknown", language: "en-US" },
{ id: "Brave", name: "Brave", gender: "Unknown", language: "en-US" },
{ id: "Contessa", name: "Contessa", gender: "Unknown", language: "en-US" },
{ id: "Daintree", name: "Daintree", gender: "Unknown", language: "en-US" },
{ id: "Eugora", name: "Eugora", gender: "Unknown", language: "en-US" },
{ id: "Fornax", name: "Fornax", gender: "Unknown", language: "en-US" },
{ id: "Griffin", name: "Griffin", gender: "Unknown", language: "en-US" },
{ id: "Hestia", name: "Hestia", gender: "Unknown", language: "en-US" },
{ id: "Irving", name: "Irving", gender: "Unknown", language: "en-US" },
{ id: "Jasmine", name: "Jasmine", gender: "Unknown", language: "en-US" },
{ id: "Kestra", name: "Kestra", gender: "Unknown", language: "en-US" },
{ id: "Lorentz", name: "Lorentz", gender: "Unknown", language: "en-US" },
{ id: "Mara", name: "Mara", gender: "Unknown", language: "en-US" },
{ id: "Nettle", name: "Nettle", gender: "Unknown", language: "en-US" },
{ id: "Orin", name: "Orin", gender: "Unknown", language: "en-US" },
{ id: "Puck", name: "Puck", gender: "Unknown", language: "en-US" },
{ id: "Quinn", name: "Quinn", gender: "Unknown", language: "en-US" },
{ id: "Rune", name: "Rune", gender: "Unknown", language: "en-US" },
{ id: "Simbe", name: "Simbe", gender: "Unknown", language: "en-US" },
{ id: "Tertia", name: "Tertia", gender: "Unknown", language: "en-US" },
{ id: "Umbriel", name: "Umbriel", gender: "Unknown", language: "en-US" },
{ id: "Vesta", name: "Vesta", gender: "Unknown", language: "en-US" },
{ id: "Wystan", name: "Wystan", gender: "Unknown", language: "en-US" },
{ id: "Xeno", name: "Xeno", gender: "Unknown", language: "en-US" },
{ id: "Yara", name: "Yara", gender: "Unknown", language: "en-US" },
{ id: "Zephyr", name: "Zephyr", gender: "Unknown", language: "en-US" },
]
});