UNPKG

@mastra/voice-sarvam

Version:

Mastra Sarvam AI voice integration

1 lines 15.2 kB
{"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":["MastraVoice","stream","PassThrough"],"mappings":";;;;;;;;AACO,IAAM,yBAAA,GAA4B;AAAA,EACvC,OAAA;AAAA,EACA,QAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,QAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA;AAAA,EACA,QAAA;AAAA,EACA,QAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,UAAA;AAAA,EACA,QAAA;AAAA,EACA,QAAA;AAAA,EACA,QAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,QAAA;AAAA,EACA,QAAA;AAAA,EACA,OAAA;AAAA,EACA,SAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA;AACF,CAAA;AAGO,IAAM,yBAAA,GAA4B;AAAA,EACvC,SAAA;AAAA,EACA,SAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,UAAA;AAAA,EACA,OAAA;AAAA,EACA;AACF,CAAA;AAKO,IAAM,aAAA,GAAgB,CAAC,GAAG,yBAAA,EAA2B,GAAG,yBAAyB,CAAA;;;ACXxF,IAAM,kBAAA,GAAqB;AAAA,EACzB,KAAA,EAAO,WAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAAA;AAAA,EACpB,QAAA,EAAU;AACZ,CAAA;AAEA,IAAM,qBAAA,GAAwB;AAAA,EAC5B,KAAA,EAAO,cAAA;AAAA,EACP,MAAA,EAAQ,QAAQ,GAAA,CAAI,cAEtB,CAAA;AAEO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EACnC,MAAA;AAAA,EACA,KAAA,GAAwB,WAAA;AAAA,EACxB,QAAA,GAA8B,OAAA;AAAA,EAC9B,aAAkC,EAAC;AAAA,EAC3C,OAAA,GAAyB,OAAA;AAAA,EACjB,OAAA,GAAU,uBAAA;AAAA,EAElB,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,OAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AAAA,QAC/C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,KAAA,IAAS,qBAAA,CAAsB,KAAA;AAAA,QACrD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,cAAA,EAAgB,UAAU,kBAAA,CAAmB,MAAA;AAClF,IAAA,IAAI,CAAC,KAAK,MAAA,EAAQ;AAChB,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,WAAA,EAAa,KAAA,IAAS,kBAAA,CAAmB,KAAA;AACtD,IAAA,IAAA,CAAK,QAAA,GAAW,WAAA,EAAa,QAAA,IAAY,kBAAA,CAAmB,QAAA;AAC5D,IAAA,IAAA,CAAK,UAAA,GAAa,WAAA,EAAa,UAAA,IAAc,EAAC;AAG9C,IAAA,MAAM,cAAA,GAAgC,IAAA,CAAK,KAAA,KAAU,WAAA,GAAc,SAAA,GAAY,OAAA;AAC/E,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,cAAA;AAAA,EAC5B;AAAA,EAEA,MAAc,WAAA,CAAY,QAAA,EAAkB,OAAA,EAAc;AACxD,IAAA,MAAM,OAAA,GAAU,IAAI,OAAA,CAAQ;AAAA,MAC1B,wBAAwB,IAAA,CAAK,MAAA;AAAA,MAC7B,cAAA,EAAgB;AAAA,KACjB,CAAA;AACD,IAAA,MAAM,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,KAAK,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI;AAAA,MACzD,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA;AAAA,MACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO;AAAA,KAC7B,CAAA;AACD,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,IAAI,YAAA;AACJ,MAAA,IAAI;AACF,QAAA,MAAM,KAAA,GAAS,MAAM,QAAA,CAAS,IAAA,EAAK;AACnC,QAAA,YAAA,GAAe,KAAA,CAAM,WAAW,QAAA,CAAS,UAAA;AAAA,MAC3C,CAAA,CAAA,MAAQ;AACN,QAAA,YAAA,GAAe,QAAA,CAAS,UAAA;AAAA,MAC1B;AACA,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwB,YAAY,CAAA,CAAE,CAAA;AAAA,IACxD;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AAAA,EACA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA,EACA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EACgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,MAAM,OAAA,GAAU;AAAA,MACd,IAAA;AAAA,MACA,sBAAsB,IAAA,CAAK,QAAA;AAAA,MAC3B,OAAA,EAAS,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAAA,MAClC,OAAO,IAAA,CAAK,KAAA;AAAA,MACZ,GAAG,IAAA,CAAK;AAAA,KACV;AAEA,IAAA,MAAM,QAAA,GAAW,MAAM,IAAA,CAAK,WAAA,CAAY,mBAAmB,OAAO,CAAA;AAElE,IAAA,MAAM,EAAE,MAAA,EAAO,GAAK,MAAM,SAAS,IAAA,EAAK;AAExC,IAAA,IAAI,CAAC,MAAA,IAAU,CAAC,MAAA,CAAO,MAAA,EAAQ;AAC7B,MAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,IACpD;AAGA,IAAA,MAAM,cAAc,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,CAAC,GAAG,QAAQ,CAAA;AAGnD,IAAA,MAAMC,QAAA,GAAS,IAAIC,kBAAA,EAAY;AAC/B,IAAAD,QAAA,CAAO,MAAM,WAAW,CAAA;AACxB,IAAAA,QAAA,CAAO,GAAA,EAAI;AAEX,IAAA,OAAOA,QAAA;AAAA,EACT;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,aAAA,CAAc,IAAI,CAAA,KAAA,MAAU;AAAA,MACjC,OAAA,EAAS;AAAA,KACX,CAAE,CAAA;AAAA,EACJ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA,EAEA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAgD;AAEzF,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,IAAA,GAAO,IAAI,QAAA,EAAS;AAC1B,IAAA,MAAM,QAAA,GAAW,OAAA,EAAS,QAAA,KAAa,KAAA,GAAQ,YAAA,GAAe,WAAA;AAC9D,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,EAAE,IAAA,EAAM,QAAA,EAAU,CAAA;AAEvD,IAAA,IAAA,CAAK,MAAA,CAAO,QAAQ,IAAI,CAAA;AACxB,IAAA,IAAA,CAAK,MAAA,CAAO,OAAA,EAAS,OAAA,EAAS,KAAA,IAAS,cAAc,CAAA;AACrD,IAAA,IAAA,CAAK,MAAA,CAAO,eAAA,EAAiB,OAAA,EAAS,YAAA,IAAgB,SAAS,CAAA;AAE/D,IAAA,IAAI,SAAS,IAAA,EAAM;AACjB,MAAA,IAAA,CAAK,MAAA,CAAO,MAAA,EAAQ,OAAA,CAAQ,IAAI,CAAA;AAAA,IAClC;AACA,IAAA,MAAM,cAAA,GAAiB;AAAA,MACrB,MAAA,EAAQ,MAAA;AAAA,MACR,OAAA,EAAS;AAAA,QACP,wBAAwB,IAAA,CAAK;AAAA,OAC/B;AAAA,MACA,IAAA,EAAM;AAAA,KACR;AAEA,IAAA,IAAI;AACF,MAAA,MAAM,WAAW,MAAM,KAAA,CAAM,GAAG,IAAA,CAAK,OAAO,mBAAmB,cAAc,CAAA;AAC7E,MAAA,MAAM,MAAA,GAAU,MAAM,QAAA,CAAS,IAAA,EAAK;AACpC,MAAA,OAAO,MAAA,CAAO,UAAA;AAAA,IAChB,SAAS,KAAA,EAAO;AACd,MAAA,OAAA,CAAQ,KAAA,CAAM,wCAAwC,KAAK,CAAA;AAC3D,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AACF","file":"index.cjs","sourcesContent":["// Speakers available for bulbul:v3 (39 voices)\nexport const SARVAM_BULBUL_V3_SPEAKERS = [\n 'shubh',\n 'aditya',\n 'ritu',\n 'priya',\n 'neha',\n 'rahul',\n 'pooja',\n 'rohan',\n 'simran',\n 'kavya',\n 'amit',\n 'dev',\n 'ishita',\n 'shreya',\n 'ratan',\n 'varun',\n 'manan',\n 'sumit',\n 'roopa',\n 'kabir',\n 'aayan',\n 'ashutosh',\n 'advait',\n 'amelia',\n 'sophia',\n 'anand',\n 'tanya',\n 'tarun',\n 'sunny',\n 'mani',\n 'gokul',\n 'vijay',\n 'shruti',\n 'suhani',\n 'mohit',\n 'kavitha',\n 'rehan',\n 'soham',\n 'rupali',\n] as const;\n\n// Speakers available for bulbul:v2 (7 voices, no overlap with v3)\nexport const SARVAM_BULBUL_V2_SPEAKERS = [\n 'anushka',\n 'manisha',\n 'vidya',\n 'arya',\n 'abhilash',\n 'karun',\n 'hitesh',\n] as const;\n\n// Combined list of all Sarvam speakers across supported bulbul models.\n// bulbul:v1 speakers (meera, pavithra, …) have been removed as Sarvam\n// deprecated bulbul:v1 — use bulbul:v2 or bulbul:v3 instead.\nexport const SARVAM_VOICES = [...SARVAM_BULBUL_V3_SPEAKERS, ...SARVAM_BULBUL_V2_SPEAKERS] as const;\n\nexport const SARVAM_TTS_LANGUAGES = [\n 'hi-IN',\n 'bn-IN',\n 'kn-IN',\n 'ml-IN',\n 'mr-IN',\n 'od-IN',\n 'pa-IN',\n 'ta-IN',\n 'te-IN',\n 'en-IN',\n 'gu-IN',\n] as const;\n\nexport const SARVAM_STT_LANGUAGES = [...SARVAM_TTS_LANGUAGES, 'unknown'] as const;\n\n// Current TTS models. bulbul:v1 was deprecated and removed by Sarvam.\n// bulbul:v3-beta is a beta variant of bulbul:v3 that shares the same speaker catalog.\nexport const SARVAM_TTS_MODELS = ['bulbul:v2', 'bulbul:v3', 'bulbul:v3-beta'] as const;\n\n// Current STT models. saarika:v1, saarika:v2, and saarika:flash were deprecated.\n// saaras:v3 is a multi-mode model that supports transcribe/translate/verbatim/translit/codemix\n// via the `mode` option and is served from the same POST /speech-to-text endpoint.\nexport const SARVAM_STT_MODELS = ['saarika:v2.5', 'saaras:v3'] as const;\n\n// Operation modes supported by saaras:v3 only.\nexport const SARVAM_STT_MODES = ['transcribe', 'translate', 'verbatim', 'translit', 'codemix'] as const;\n\nexport type SarvamVoiceId = (typeof SARVAM_VOICES)[number];\n\nexport type SarvamTTSLanguage = (typeof SARVAM_TTS_LANGUAGES)[number];\nexport type SarvamSTTLanguage = (typeof SARVAM_STT_LANGUAGES)[number];\n\nexport type SarvamTTSModel = (typeof SARVAM_TTS_MODELS)[number];\nexport type SarvamSTTModel = (typeof SARVAM_STT_MODELS)[number];\nexport type SarvamSTTMode = (typeof SARVAM_STT_MODES)[number];\n","import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport { SARVAM_VOICES } from './voices';\nimport type {\n SarvamTTSLanguage,\n SarvamSTTLanguage,\n SarvamSTTModel,\n SarvamTTSModel,\n SarvamVoiceId,\n SarvamSTTMode,\n} from './voices';\n\ninterface SarvamVoiceConfig {\n apiKey?: string;\n model?: SarvamTTSModel;\n language?: SarvamTTSLanguage;\n properties?: {\n /** Controls the speed of the audio. Supported by bulbul:v2 (0.3–3.0) and bulbul:v3 (0.5–2.0). */\n pace?: number;\n /** Sampling temperature. bulbul:v3 only. Range: 0.01–2.0. Default: 0.6. */\n temperature?: number;\n /** Pronunciation dictionary ID. bulbul:v3 only. */\n dict_id?: string;\n /** Controls the pitch of the audio. bulbul:v2 only. Range: -0.75–0.75. */\n pitch?: number;\n /** Controls the loudness of the audio. bulbul:v2 only. Range: 0.3–3.0. */\n loudness?: number;\n /** Enables normalization of English words and numeric entities. bulbul:v2 only. */\n enable_preprocessing?: boolean;\n /** Audio sample rate in Hz. */\n speech_sample_rate?: 8000 | 16000 | 22050 | 24000 | 32000 | 44100 | 48000;\n /** Output audio codec. */\n output_audio_codec?: 'mp3' | 'wav' | 'linear16' | 'mulaw' | 'alaw' | 'opus' | 'flac' | 'aac';\n };\n}\n\ninterface SarvamListenOptions {\n apiKey?: string;\n model?: SarvamSTTModel;\n languageCode?: SarvamSTTLanguage;\n filetype?: 'mp3' | 'wav';\n /** Operation mode for saaras:v3. Ignored by other models. */\n mode?: SarvamSTTMode;\n}\n\nconst defaultSpeechModel = {\n model: 'bulbul:v3' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language: 'en-IN' as const,\n};\n\nconst defaultListeningModel = {\n model: 'saarika:v2.5' as const,\n apiKey: process.env.SARVAM_API_KEY,\n language_code: 'unknown' as const,\n};\n\nexport class SarvamVoice extends MastraVoice {\n private apiKey?: string;\n private model: SarvamTTSModel = 'bulbul:v3';\n private language: SarvamTTSLanguage = 'en-IN';\n private properties: Record<string, any> = {};\n speaker: SarvamVoiceId = 'shubh';\n private baseUrl = 'https://api.sarvam.ai';\n\n constructor({\n speechModel,\n speaker,\n listeningModel,\n }: {\n speechModel?: SarvamVoiceConfig;\n speaker?: SarvamVoiceId;\n listeningModel?: SarvamListenOptions;\n } = {}) {\n super({\n speechModel: {\n name: speechModel?.model ?? defaultSpeechModel.model,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.model ?? defaultListeningModel.model,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.apiKey = speechModel?.apiKey || listeningModel?.apiKey || defaultSpeechModel.apiKey;\n if (!this.apiKey) {\n throw new Error('SARVAM_API_KEY must be set');\n }\n this.model = speechModel?.model || defaultSpeechModel.model;\n this.language = speechModel?.language || defaultSpeechModel.language;\n this.properties = speechModel?.properties || {};\n // bulbul:v2 and bulbul:v3 have non-overlapping speaker catalogs, so the\n // default speaker depends on the selected TTS model.\n const defaultSpeaker: SarvamVoiceId = this.model === 'bulbul:v2' ? 'anushka' : 'shubh';\n this.speaker = speaker || defaultSpeaker;\n }\n\n private async makeRequest(endpoint: string, payload: any) {\n const headers = new Headers({\n 'api-subscription-key': this.apiKey!,\n 'Content-Type': 'application/json',\n });\n const response = await fetch(`${this.baseUrl}${endpoint}`, {\n method: 'POST',\n headers,\n body: JSON.stringify(payload),\n });\n if (!response.ok) {\n let errorMessage;\n try {\n const error = (await response.json()) as { message?: string };\n errorMessage = error.message || response.statusText;\n } catch {\n errorMessage = response.statusText;\n }\n throw new Error(`Sarvam AI API Error: ${errorMessage}`);\n }\n\n return response;\n }\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: { speaker?: SarvamVoiceId },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n const payload = {\n text,\n target_language_code: this.language,\n speaker: options?.speaker || this.speaker,\n model: this.model,\n ...this.properties,\n };\n\n const response = await this.makeRequest('/text-to-speech', payload);\n\n const { audios } = (await response.json()) as { audios: any };\n\n if (!audios || !audios.length) {\n throw new Error('No audio received from Sarvam AI');\n }\n\n // Convert base64 to buffer\n const audioBuffer = Buffer.from(audios[0], 'base64');\n\n // Create a PassThrough stream for the audio\n const stream = new PassThrough();\n stream.write(audioBuffer);\n stream.end();\n\n return stream;\n }\n\n async getSpeakers() {\n return SARVAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n async listen(input: NodeJS.ReadableStream, options?: SarvamListenOptions): Promise<string> {\n // Collect audio data into buffer\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const form = new FormData();\n const mimeType = options?.filetype === 'mp3' ? 'audio/mpeg' : 'audio/wav';\n const blob = new Blob([audioBuffer], { type: mimeType });\n\n form.append('file', blob);\n form.append('model', options?.model || 'saarika:v2.5');\n form.append('language_code', options?.languageCode || 'unknown');\n // `mode` is only meaningful for saaras:v3 — Sarvam ignores it for saarika models.\n if (options?.mode) {\n form.append('mode', options.mode);\n }\n const requestOptions = {\n method: 'POST',\n headers: {\n 'api-subscription-key': this.apiKey!,\n },\n body: form,\n };\n\n try {\n const response = await fetch(`${this.baseUrl}/speech-to-text`, requestOptions);\n const result = (await response.json()) as any;\n return result.transcript;\n } catch (error) {\n console.error('Error during speech-to-text request:', error);\n throw error;\n }\n }\n}\n"]}