js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
246 lines (245 loc) • 9.14 kB
JavaScript
import { AbstractTTSClient } from "../core/abstract-tts.js";
import * as SSMLUtils from "../core/ssml-utils.js";
import * as SpeechMarkdown from "../markdown/converter.js";
import { getFetch } from "../utils/fetch-utils.js";
import { toIso639_3, toLanguageDisplay } from "../utils/language-utils.js";
const fetch = getFetch();
export class HumeTTSClient extends AbstractTTSClient {
constructor(credentials = {}) {
super(credentials);
Object.defineProperty(this, "apiKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "baseUrl", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "model", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.apiKey = credentials.apiKey || process.env.HUME_API_KEY || "";
this.baseUrl = credentials.baseURL || "https://api.hume.ai/v0";
this.model = credentials.model || "octave-2";
this.voiceId = "aac4caff-e2e1-4088-9d58-a29c5d22dce6";
this._models = [
{ id: "octave-2", features: ["streaming", "inline-voice-cloning"] },
{ id: "octave-1", features: ["streaming"] },
];
this.sampleRate = 24000;
this.applyCredentialProperties(credentials);
}
applyCredentialProperties(credentials) {
const rawProps = credentials.properties ??
credentials.propertiesJson ??
credentials.propertiesJSON;
if (rawProps) {
let parsed = null;
if (typeof rawProps === "string") {
try {
parsed = JSON.parse(rawProps);
}
catch {
/* ignore */
}
}
else if (typeof rawProps === "object") {
parsed = rawProps;
}
if (parsed) {
for (const [key, value] of Object.entries(parsed)) {
this.setProperty(key, value);
}
}
}
}
resolveVersion(modelId) {
if (modelId === "octave-2")
return "2";
if (modelId === "octave-1")
return "1";
return undefined;
}
async prepareText(text, options) {
let processedText = text;
if (options?.useSpeechMarkdown && SpeechMarkdown.isSpeechMarkdown(processedText)) {
const ssml = await SpeechMarkdown.toSSML(processedText, "w3c");
processedText = SSMLUtils.stripSSML(ssml);
}
if (SSMLUtils.isSSML(processedText)) {
processedText = SSMLUtils.stripSSML(processedText);
}
return processedText;
}
setModel(model) {
this.model = model;
}
setVoice(voiceId) {
this.voiceId = voiceId;
}
getProperty(property) {
switch (property) {
case "model":
return this.model;
case "voice":
return this.voiceId;
default:
return super.getProperty(property);
}
}
setProperty(property, value) {
switch (property) {
case "model":
this.setModel(value);
break;
case "voice":
this.setVoice(value);
break;
default:
super.setProperty(property, value);
break;
}
}
async checkCredentials() {
if (!this.apiKey)
return false;
try {
const response = await fetch(`${this.baseUrl}/tts/file`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Hume-Api-Key": this.apiKey,
},
body: JSON.stringify({ utterances: [{ text: "t" }] }),
});
return response.status !== 401 && response.status !== 403;
}
catch {
return false;
}
}
getRequiredCredentials() {
return ["apiKey"];
}
async _getVoices() {
return HumeTTSClient.VOICES;
}
async _mapVoicesToUnified(rawVoices) {
return rawVoices.map((voice) => ({
id: voice.id,
name: voice.name,
gender: voice.gender,
languageCodes: [
{
bcp47: voice.language || "en-US",
iso639_3: toIso639_3(voice.language || "en-US"),
display: toLanguageDisplay(voice.language || "en-US"),
},
],
provider: "hume",
}));
}
async synthToBytes(text, options = {}) {
const preparedText = await this.prepareText(text, options);
const modelId = options.model || this.model;
const voiceId = options.voice || this.voiceId;
const utterance = { text: preparedText };
if (voiceId) {
utterance.voice = { name: voiceId, provider: "HUME_AI" };
}
const body = {
...options.providerOptions,
utterances: [utterance],
};
const version = this.resolveVersion(modelId);
if (version != null) {
body.version = version;
}
const response = await fetch(`${this.baseUrl}/tts/file`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Hume-Api-Key": this.apiKey,
},
body: JSON.stringify(body),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Hume API error: ${response.status} ${response.statusText} - ${errorText}`);
}
const arrayBuffer = await response.arrayBuffer();
this._createEstimatedWordTimings(preparedText);
return new Uint8Array(arrayBuffer);
}
async synthToBytestream(text, options = {}) {
const preparedText = await this.prepareText(text, options);
const modelId = options.model || this.model;
const voiceId = options.voice || this.voiceId;
const utterance = { text: preparedText };
if (voiceId) {
utterance.voice = { name: voiceId, provider: "HUME_AI" };
}
const body = {
...options.providerOptions,
utterances: [utterance],
};
const version = this.resolveVersion(modelId);
if (version != null) {
body.version = version;
}
const response = await fetch(`${this.baseUrl}/tts/stream/file`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-Hume-Api-Key": this.apiKey,
},
body: JSON.stringify(body),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Hume API error: ${response.status} ${response.statusText} - ${errorText}`);
}
if (!response.body) {
const arrayBuffer = await response.arrayBuffer();
const audioData = new Uint8Array(arrayBuffer);
const readableStream = new ReadableStream({
start(controller) {
controller.enqueue(audioData);
controller.close();
},
});
return { audioStream: readableStream, wordBoundaries: [] };
}
return { audioStream: response.body, wordBoundaries: [] };
}
}
Object.defineProperty(HumeTTSClient, "VOICES", {
enumerable: true,
configurable: true,
writable: true,
value: [
{ id: "ito", name: "Ito", gender: "Unknown", language: "en-US" },
{ id: "acantha", name: "Acantha", gender: "Unknown", language: "en-US" },
{ id: "ant ai gonus", name: "Antigonos", gender: "Unknown", language: "en-US" },
{ id: "ari", name: "Ari", gender: "Unknown", language: "en-US" },
{ id: "brant", name: "Brant", gender: "Unknown", language: "en-US" },
{ id: "daniel", name: "Daniel", gender: "Unknown", language: "en-US" },
{ id: "fin", name: "Fin", gender: "Unknown", language: "en-US" },
{ id: "hype", name: "Hype", gender: "Unknown", language: "en-US" },
{ id: "kora", name: "Kora", gender: "Unknown", language: "en-US" },
{ id: "mango", name: "Mango", gender: "Unknown", language: "en-US" },
{ id: "marek", name: "Marek", gender: "Unknown", language: "en-US" },
{ id: "ogma", name: "Ogma", gender: "Unknown", language: "en-US" },
{ id: "sora", name: "Sora", gender: "Unknown", language: "en-US" },
{ id: "terrence", name: "Terrence", gender: "Unknown", language: "en-US" },
{ id: "vitor", name: "Vitor", gender: "Unknown", language: "en-US" },
{ id: "zach", name: "Zach", gender: "Unknown", language: "en-US" },
]
});