UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

331 lines (330 loc) 13.1 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.MistralTTSClient = void 0; const abstract_tts_1 = require("../core/abstract-tts"); const SSMLUtils = __importStar(require("../core/ssml-utils")); const SpeechMarkdown = __importStar(require("../markdown/converter")); const base64_utils_1 = require("../utils/base64-utils"); const fetch_utils_1 = require("../utils/fetch-utils"); const language_utils_1 = require("../utils/language-utils"); const fetch = (0, fetch_utils_1.getFetch)(); class MistralTTSClient extends abstract_tts_1.AbstractTTSClient { constructor(credentials = {}) { super(credentials); Object.defineProperty(this, "apiKey", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "baseUrl", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "model", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "responseFormat", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.apiKey = credentials.apiKey || process.env.MISTRAL_API_KEY || ""; this.baseUrl = credentials.baseURL || "https://api.mistral.ai/v1"; this.model = credentials.model || "voxtral-mini-tts-2603"; this.voiceId = ""; this.responseFormat = "mp3"; this._models = [ { id: "voxtral-mini-tts-2603", features: ["streaming", "inline-voice-cloning", "open-source"], }, ]; this.sampleRate = 24000; this.applyCredentialProperties(credentials); } applyCredentialProperties(credentials) { const rawProps = credentials.properties ?? credentials.propertiesJson ?? credentials.propertiesJSON; if (rawProps) { let parsed = null; if (typeof rawProps === "string") { try { parsed = JSON.parse(rawProps); } catch { /* ignore */ } } else if (typeof rawProps === "object") { parsed = rawProps; } if (parsed) { for (const [key, value] of Object.entries(parsed)) { this.setProperty(key, value); } } } } async prepareText(text, options) { let processedText = text; if (options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) { const ssml = await SpeechMarkdown.toSSML(processedText, "w3c"); processedText = SSMLUtils.stripSSML(ssml); } if (SSMLUtils.isSSML(processedText)) { processedText = SSMLUtils.stripSSML(processedText); } return processedText; } setModel(model) { this.model = model; } setVoice(voiceId) { this.voiceId = voiceId; } getProperty(property) { switch (property) { case "model": return this.model; case "voice": return this.voiceId; case "responseFormat": return this.responseFormat; default: return super.getProperty(property); } } setProperty(property, value) { switch (property) { case "model": this.setModel(value); break; case "voice": this.setVoice(value); break; case "responseFormat": this.responseFormat = value; break; default: super.setProperty(property, value); break; } } async checkCredentials() { if (!this.apiKey) return false; try { const response = await fetch(`${this.baseUrl}/models`, { method: "GET", headers: { Authorization: `Bearer ${this.apiKey}`, }, }); return response.ok; } catch { return false; } } getRequiredCredentials() { return ["apiKey"]; } async _getVoices() { return MistralTTSClient.VOICES; } async _mapVoicesToUnified(rawVoices) { return rawVoices.map((voice) => ({ id: voice.id, name: voice.name, gender: voice.gender, languageCodes: [ { bcp47: voice.language || "en-US", iso639_3: (0, language_utils_1.toIso639_3)(voice.language || "en-US"), display: (0, language_utils_1.toLanguageDisplay)(voice.language || "en-US"), }, ], provider: "mistral", })); } async synthToBytes(text, options = {}) { const preparedText = await this.prepareText(text, options); const modelId = options.model || this.model; const voiceId = options.voice || this.voiceId; const body = { response_format: options.responseFormat || this.responseFormat, ...options.providerOptions, model: modelId, input: preparedText, }; if (voiceId) { body.voice_id = voiceId; } const response = await fetch(`${this.baseUrl}/audio/speech`, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify(body), }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`); } const json = (await response.json()); this._createEstimatedWordTimings(preparedText); return (0, base64_utils_1.base64ToUint8Array)(json.audio_data); } async synthToBytestream(text, options = {}) { const preparedText = await this.prepareText(text, options); const modelId = options.model || this.model; const voiceId = options.voice || this.voiceId; const body = { response_format: options.responseFormat || this.responseFormat, ...options.providerOptions, model: modelId, input: preparedText, stream: true, }; if (voiceId) { body.voice_id = voiceId; } const response = await fetch(`${this.baseUrl}/audio/speech`, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, Accept: "text/event-stream", }, body: JSON.stringify(body), }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`); } if (!response.body) { const bytes = await this.synthToBytes(text, options); const readableStream = new ReadableStream({ start(controller) { controller.enqueue(bytes); controller.close(); }, }); return { audioStream: readableStream, wordBoundaries: [] }; } const sseStream = this.parseSseBase64Stream(response.body); return { audioStream: sseStream, wordBoundaries: [] }; } parseSseBase64Stream(body) { const reader = body.getReader(); const decoder = new TextDecoder(); let buffer = ""; return new ReadableStream({ async pull(controller) { while (true) { const { done, value } = await reader.read(); if (done) { controller.close(); return; } buffer += decoder.decode(value, { stream: true }); const lines = buffer.split("\n"); buffer = lines.pop() || ""; for (const line of lines) { if (!line.startsWith("data: ")) continue; const data = line.slice(6).trim(); if (!data || data === "[DONE]") continue; try { const json = JSON.parse(data); if (json.type === "speech.audio.delta" && typeof json.audio_data === "string") { controller.enqueue((0, base64_utils_1.base64ToUint8Array)(json.audio_data)); } } catch { /* skip malformed */ } } } }, }); } } exports.MistralTTSClient = MistralTTSClient; Object.defineProperty(MistralTTSClient, "VOICES", { enumerable: true, configurable: true, writable: true, value: [ { id: "Amalthea", name: "Amalthea", gender: "Unknown", language: "en-US" }, { id: "Achan", name: "Achan", gender: "Unknown", language: "en-US" }, { id: "Brave", name: "Brave", gender: "Unknown", language: "en-US" }, { id: "Contessa", name: "Contessa", gender: "Unknown", language: "en-US" }, { id: "Daintree", name: "Daintree", gender: "Unknown", language: "en-US" }, { id: "Eugora", name: "Eugora", gender: "Unknown", language: "en-US" }, { id: "Fornax", name: "Fornax", gender: "Unknown", language: "en-US" }, { id: "Griffin", name: "Griffin", gender: "Unknown", language: "en-US" }, { id: "Hestia", name: "Hestia", gender: "Unknown", language: "en-US" }, { id: "Irving", name: "Irving", gender: "Unknown", language: "en-US" }, { id: "Jasmine", name: "Jasmine", gender: "Unknown", language: "en-US" }, { id: "Kestra", name: "Kestra", gender: "Unknown", language: "en-US" }, { id: "Lorentz", name: "Lorentz", gender: "Unknown", language: "en-US" }, { id: "Mara", name: "Mara", gender: "Unknown", language: "en-US" }, { id: "Nettle", name: "Nettle", gender: "Unknown", language: "en-US" }, { id: "Orin", name: "Orin", gender: "Unknown", language: "en-US" }, { id: "Puck", name: "Puck", gender: "Unknown", language: "en-US" }, { id: "Quinn", name: "Quinn", gender: "Unknown", language: "en-US" }, { id: "Rune", name: "Rune", gender: "Unknown", language: "en-US" }, { id: "Simbe", name: "Simbe", gender: "Unknown", language: "en-US" }, { id: "Tertia", name: "Tertia", gender: "Unknown", language: "en-US" }, { id: "Umbriel", name: "Umbriel", gender: "Unknown", language: "en-US" }, { id: "Vesta", name: "Vesta", gender: "Unknown", language: "en-US" }, { id: "Wystan", name: "Wystan", gender: "Unknown", language: "en-US" }, { id: "Xeno", name: "Xeno", gender: "Unknown", language: "en-US" }, { id: "Yara", name: "Yara", gender: "Unknown", language: "en-US" }, { id: "Zephyr", name: "Zephyr", gender: "Unknown", language: "en-US" }, ] });