UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

294 lines (293 loc) 11.5 kB
import { AbstractTTSClient } from "../core/abstract-tts.js"; import * as SSMLUtils from "../core/ssml-utils.js"; import * as SpeechMarkdown from "../markdown/converter.js"; import { base64ToUint8Array } from "../utils/base64-utils.js"; import { getFetch } from "../utils/fetch-utils.js"; import { toIso639_3, toLanguageDisplay } from "../utils/language-utils.js"; const fetch = getFetch(); export class MistralTTSClient extends AbstractTTSClient { constructor(credentials = {}) { super(credentials); Object.defineProperty(this, "apiKey", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "baseUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "model", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "responseFormat", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.apiKey = credentials.apiKey || process.env.MISTRAL_API_KEY || ""; this.baseUrl = credentials.baseURL || "https://api.mistral.ai/v1"; this.model = credentials.model || "voxtral-mini-tts-2603"; this.voiceId = ""; this.responseFormat = "mp3"; this._models = [ { id: "voxtral-mini-tts-2603", features: ["streaming", "inline-voice-cloning", "open-source"], }, ]; this.sampleRate = 24000; this.applyCredentialProperties(credentials); } applyCredentialProperties(credentials) { const rawProps = credentials.properties ?? credentials.propertiesJson ?? credentials.propertiesJSON; if (rawProps) { let parsed = null; if (typeof rawProps === "string") { try { parsed = JSON.parse(rawProps); } catch { /* ignore */ } } else if (typeof rawProps === "object") { parsed = rawProps; } if (parsed) { for (const [key, value] of Object.entries(parsed)) { this.setProperty(key, value); } } } } async prepareText(text, options) { let processedText = text; if (options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) { const ssml = await SpeechMarkdown.toSSML(processedText, "w3c"); processedText = SSMLUtils.stripSSML(ssml); } if (SSMLUtils.isSSML(processedText)) { processedText = SSMLUtils.stripSSML(processedText); } return processedText; } setModel(model) { this.model = model; } setVoice(voiceId) { this.voiceId = voiceId; } getProperty(property) { switch (property) { case "model": return this.model; case "voice": return this.voiceId; case "responseFormat": return this.responseFormat; default: return super.getProperty(property); } } setProperty(property, value) { switch (property) { case "model": this.setModel(value); break; case "voice": this.setVoice(value); break; case "responseFormat": this.responseFormat = value; break; default: super.setProperty(property, value); break; } } async checkCredentials() { if (!this.apiKey) return false; try { const response = await fetch(`${this.baseUrl}/models`, { method: "GET", headers: { Authorization: `Bearer ${this.apiKey}`, }, }); return response.ok; } catch { return false; } } getRequiredCredentials() { return ["apiKey"]; } async _getVoices() { return MistralTTSClient.VOICES; } async _mapVoicesToUnified(rawVoices) { return rawVoices.map((voice) => ({ id: voice.id, name: voice.name, gender: voice.gender, languageCodes: [ { bcp47: voice.language || "en-US", iso639_3: toIso639_3(voice.language || "en-US"), display: toLanguageDisplay(voice.language || "en-US"), }, ], provider: "mistral", })); } async synthToBytes(text, options = {}) { const preparedText = await this.prepareText(text, options); const modelId = options.model || this.model; const voiceId = options.voice || this.voiceId; const body = { response_format: options.responseFormat || this.responseFormat, ...options.providerOptions, model: modelId, input: preparedText, }; if (voiceId) { body.voice_id = voiceId; } const response = await fetch(`${this.baseUrl}/audio/speech`, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(body), }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`); } const json = (await response.json()); this._createEstimatedWordTimings(preparedText); return base64ToUint8Array(json.audio_data); } async synthToBytestream(text, options = {}) { const preparedText = await this.prepareText(text, options); const modelId = options.model || this.model; const voiceId = options.voice || this.voiceId; const body = { response_format: options.responseFormat || this.responseFormat, ...options.providerOptions, model: modelId, input: preparedText, stream: true, }; if (voiceId) { body.voice_id = voiceId; } const response = await fetch(`${this.baseUrl}/audio/speech`, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, Accept: "text/event-stream", }, body: JSON.stringify(body), }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`); } if (!response.body) { const bytes = await this.synthToBytes(text, options); const readableStream = new ReadableStream({ start(controller) { controller.enqueue(bytes); controller.close(); }, }); return { audioStream: readableStream, wordBoundaries: [] }; } const sseStream = this.parseSseBase64Stream(response.body); return { audioStream: sseStream, wordBoundaries: [] }; } parseSseBase64Stream(body) { const reader = body.getReader(); const decoder = new TextDecoder(); let buffer = ""; return new ReadableStream({ async pull(controller) { while (true) { const { done, value } = await reader.read(); if (done) { controller.close(); return; } buffer += decoder.decode(value, { stream: true }); const lines = buffer.split("\n"); buffer = lines.pop() || ""; for (const line of lines) { if (!line.startsWith("data: ")) continue; const data = line.slice(6).trim(); if (!data || data === "[DONE]") continue; try { const json = JSON.parse(data); if (json.type === "speech.audio.delta" && typeof json.audio_data === "string") { controller.enqueue(base64ToUint8Array(json.audio_data)); } } catch { /* skip malformed */ } } } }, }); } } Object.defineProperty(MistralTTSClient, "VOICES", { enumerable: true, configurable: true, writable: true, value: [ { id: "Amalthea", name: "Amalthea", gender: "Unknown", language: "en-US" }, { id: "Achan", name: "Achan", gender: "Unknown", language: "en-US" }, { id: "Brave", name: "Brave", gender: "Unknown", language: "en-US" }, { id: "Contessa", name: "Contessa", gender: "Unknown", language: "en-US" }, { id: "Daintree", name: "Daintree", gender: "Unknown", language: "en-US" }, { id: "Eugora", name: "Eugora", gender: "Unknown", language: "en-US" }, { id: "Fornax", name: "Fornax", gender: "Unknown", language: "en-US" }, { id: "Griffin", name: "Griffin", gender: "Unknown", language: "en-US" }, { id: "Hestia", name: "Hestia", gender: "Unknown", language: "en-US" }, { id: "Irving", name: "Irving", gender: "Unknown", language: "en-US" }, { id: "Jasmine", name: "Jasmine", gender: "Unknown", language: "en-US" }, { id: "Kestra", name: "Kestra", gender: "Unknown", language: "en-US" }, { id: "Lorentz", name: "Lorentz", gender: "Unknown", language: "en-US" }, { id: "Mara", name: "Mara", gender: "Unknown", language: "en-US" }, { id: "Nettle", name: "Nettle", gender: "Unknown", language: "en-US" }, { id: "Orin", name: "Orin", gender: "Unknown", language: "en-US" }, { id: "Puck", name: "Puck", gender: "Unknown", language: "en-US" }, { id: "Quinn", name: "Quinn", gender: "Unknown", language: "en-US" }, { id: "Rune", name: "Rune", gender: "Unknown", language: "en-US" }, { id: "Simbe", name: "Simbe", gender: "Unknown", language: "en-US" }, { id: "Tertia", name: "Tertia", gender: "Unknown", language: "en-US" }, { id: "Umbriel", name: "Umbriel", gender: "Unknown", language: "en-US" }, { id: "Vesta", name: "Vesta", gender: "Unknown", language: "en-US" }, { id: "Wystan", name: "Wystan", gender: "Unknown", language: "en-US" }, { id: "Xeno", name: "Xeno", gender: "Unknown", language: "en-US" }, { id: "Yara", name: "Yara", gender: "Unknown", language: "en-US" }, { id: "Zephyr", name: "Zephyr", gender: "Unknown", language: "en-US" }, ] });