@mastra/voice-sarvam
Version:
Mastra Sarvam AI voice integration
206 lines (202 loc) • 5.25 kB
JavaScript
import { PassThrough } from 'stream';
import { MastraVoice } from '@mastra/core/voice';
// src/index.ts
// src/voices.ts
var SARVAM_BULBUL_V3_SPEAKERS = [
"shubh",
"aditya",
"ritu",
"priya",
"neha",
"rahul",
"pooja",
"rohan",
"simran",
"kavya",
"amit",
"dev",
"ishita",
"shreya",
"ratan",
"varun",
"manan",
"sumit",
"roopa",
"kabir",
"aayan",
"ashutosh",
"advait",
"amelia",
"sophia",
"anand",
"tanya",
"tarun",
"sunny",
"mani",
"gokul",
"vijay",
"shruti",
"suhani",
"mohit",
"kavitha",
"rehan",
"soham",
"rupali"
];
var SARVAM_BULBUL_V2_SPEAKERS = [
"anushka",
"manisha",
"vidya",
"arya",
"abhilash",
"karun",
"hitesh"
];
var SARVAM_VOICES = [...SARVAM_BULBUL_V3_SPEAKERS, ...SARVAM_BULBUL_V2_SPEAKERS];
// src/index.ts
var defaultSpeechModel = {
model: "bulbul:v3",
apiKey: process.env.SARVAM_API_KEY,
language: "en-IN"
};
var defaultListeningModel = {
model: "saarika:v2.5",
apiKey: process.env.SARVAM_API_KEY};
var SarvamVoice = class extends MastraVoice {
apiKey;
model = "bulbul:v3";
language = "en-IN";
properties = {};
speaker = "shubh";
baseUrl = "https://api.sarvam.ai";
constructor({
speechModel,
speaker,
listeningModel
} = {}) {
super({
speechModel: {
name: speechModel?.model ?? defaultSpeechModel.model,
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
},
listeningModel: {
name: listeningModel?.model ?? defaultListeningModel.model,
apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey
},
speaker
});
this.apiKey = speechModel?.apiKey || listeningModel?.apiKey || defaultSpeechModel.apiKey;
if (!this.apiKey) {
throw new Error("SARVAM_API_KEY must be set");
}
this.model = speechModel?.model || defaultSpeechModel.model;
this.language = speechModel?.language || defaultSpeechModel.language;
this.properties = speechModel?.properties || {};
const defaultSpeaker = this.model === "bulbul:v2" ? "anushka" : "shubh";
this.speaker = speaker || defaultSpeaker;
}
async makeRequest(endpoint, payload) {
const headers = new Headers({
"api-subscription-key": this.apiKey,
"Content-Type": "application/json"
});
const response = await fetch(`${this.baseUrl}${endpoint}`, {
method: "POST",
headers,
body: JSON.stringify(payload)
});
if (!response.ok) {
let errorMessage;
try {
const error = await response.json();
errorMessage = error.message || response.statusText;
} catch {
errorMessage = response.statusText;
}
throw new Error(`Sarvam AI API Error: ${errorMessage}`);
}
return response;
}
async streamToString(stream) {
const chunks = [];
for await (const chunk of stream) {
if (typeof chunk === "string") {
chunks.push(Buffer.from(chunk));
} else {
chunks.push(chunk);
}
}
return Buffer.concat(chunks).toString("utf-8");
}
async speak(input, options) {
const text = typeof input === "string" ? input : await this.streamToString(input);
const payload = {
text,
target_language_code: this.language,
speaker: options?.speaker || this.speaker,
model: this.model,
...this.properties
};
const response = await this.makeRequest("/text-to-speech", payload);
const { audios } = await response.json();
if (!audios || !audios.length) {
throw new Error("No audio received from Sarvam AI");
}
const audioBuffer = Buffer.from(audios[0], "base64");
const stream = new PassThrough();
stream.write(audioBuffer);
stream.end();
return stream;
}
async getSpeakers() {
return SARVAM_VOICES.map((voice) => ({
voiceId: voice
}));
}
/**
* Checks if listening capabilities are enabled.
*
* @returns {Promise<{ enabled: boolean }>}
*/
async getListener() {
return { enabled: true };
}
async listen(input, options) {
const chunks = [];
for await (const chunk of input) {
if (typeof chunk === "string") {
chunks.push(Buffer.from(chunk));
} else {
chunks.push(chunk);
}
}
const audioBuffer = Buffer.concat(chunks);
const form = new FormData();
const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav";
const blob = new Blob([audioBuffer], { type: mimeType });
form.append("file", blob);
form.append("model", options?.model || "saarika:v2.5");
form.append("language_code", options?.languageCode || "unknown");
if (options?.mode) {
form.append("mode", options.mode);
}
const requestOptions = {
method: "POST",
headers: {
"api-subscription-key": this.apiKey
},
body: form
};
try {
const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);
const result = await response.json();
return result.transcript;
} catch (error) {
console.error("Error during speech-to-text request:", error);
throw error;
}
}
};
export { SarvamVoice };
//# sourceMappingURL=index.js.map
//# sourceMappingURL=index.js.map