UNPKG

@mastra/voice-sarvam

Version:

Mastra Sarvam AI voice integration

mastra-ai/mastra

206 lines (202 loc) • 5.25 kB

JavaScript

import { PassThrough } from 'stream'; import { MastraVoice } from '@mastra/core/voice'; // src/index.ts // src/voices.ts var SARVAM_BULBUL_V3_SPEAKERS = [ "shubh", "aditya", "ritu", "priya", "neha", "rahul", "pooja", "rohan", "simran", "kavya", "amit", "dev", "ishita", "shreya", "ratan", "varun", "manan", "sumit", "roopa", "kabir", "aayan", "ashutosh", "advait", "amelia", "sophia", "anand", "tanya", "tarun", "sunny", "mani", "gokul", "vijay", "shruti", "suhani", "mohit", "kavitha", "rehan", "soham", "rupali" ]; var SARVAM_BULBUL_V2_SPEAKERS = [ "anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh" ]; var SARVAM_VOICES = [...SARVAM_BULBUL_V3_SPEAKERS, ...SARVAM_BULBUL_V2_SPEAKERS]; // src/index.ts var defaultSpeechModel = { model: "bulbul:v3", apiKey: process.env.SARVAM_API_KEY, language: "en-IN" }; var defaultListeningModel = { model: "saarika:v2.5", apiKey: process.env.SARVAM_API_KEY}; var SarvamVoice = class extends MastraVoice { apiKey; model = "bulbul:v3"; language = "en-IN"; properties = {}; speaker = "shubh"; baseUrl = "https://api.sarvam.ai"; constructor({ speechModel, speaker, listeningModel } = {}) { super({ speechModel: { name: speechModel?.model ?? defaultSpeechModel.model, apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey }, listeningModel: { name: listeningModel?.model ?? defaultListeningModel.model, apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey }, speaker }); this.apiKey = speechModel?.apiKey || listeningModel?.apiKey || defaultSpeechModel.apiKey; if (!this.apiKey) { throw new Error("SARVAM_API_KEY must be set"); } this.model = speechModel?.model || defaultSpeechModel.model; this.language = speechModel?.language || defaultSpeechModel.language; this.properties = speechModel?.properties || {}; const defaultSpeaker = this.model === "bulbul:v2" ? "anushka" : "shubh"; this.speaker = speaker || defaultSpeaker; } async makeRequest(endpoint, payload) { const headers = new Headers({ "api-subscription-key": this.apiKey, "Content-Type": "application/json" }); const response = await fetch(`${this.baseUrl}${endpoint}`, { method: "POST", headers, body: JSON.stringify(payload) }); if (!response.ok) { let errorMessage; try { const error = await response.json(); errorMessage = error.message || response.statusText; } catch { errorMessage = response.statusText; } throw new Error(`Sarvam AI API Error: ${errorMessage}`); } return response; } async streamToString(stream) { const chunks = []; for await (const chunk of stream) { if (typeof chunk === "string") { chunks.push(Buffer.from(chunk)); } else { chunks.push(chunk); } } return Buffer.concat(chunks).toString("utf-8"); } async speak(input, options) { const text = typeof input === "string" ? input : await this.streamToString(input); const payload = { text, target_language_code: this.language, speaker: options?.speaker || this.speaker, model: this.model, ...this.properties }; const response = await this.makeRequest("/text-to-speech", payload); const { audios } = await response.json(); if (!audios || !audios.length) { throw new Error("No audio received from Sarvam AI"); } const audioBuffer = Buffer.from(audios[0], "base64"); const stream = new PassThrough(); stream.write(audioBuffer); stream.end(); return stream; } async getSpeakers() { return SARVAM_VOICES.map((voice) => ({ voiceId: voice })); } /** * Checks if listening capabilities are enabled. * * @returns {Promise<{ enabled: boolean }>} */ async getListener() { return { enabled: true }; } async listen(input, options) { const chunks = []; for await (const chunk of input) { if (typeof chunk === "string") { chunks.push(Buffer.from(chunk)); } else { chunks.push(chunk); } } const audioBuffer = Buffer.concat(chunks); const form = new FormData(); const mimeType = options?.filetype === "mp3" ? "audio/mpeg" : "audio/wav"; const blob = new Blob([audioBuffer], { type: mimeType }); form.append("file", blob); form.append("model", options?.model || "saarika:v2.5"); form.append("language_code", options?.languageCode || "unknown"); if (options?.mode) { form.append("mode", options.mode); } const requestOptions = { method: "POST", headers: { "api-subscription-key": this.apiKey }, body: form }; try { const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions); const result = await response.json(); return result.transcript; } catch (error) { console.error("Error during speech-to-text request:", error); throw error; } } }; export { SarvamVoice }; //# sourceMappingURL=index.js.map //# sourceMappingURL=index.js.map