UNPKG

@mastra/core

Version:

Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.

443 lines (438 loc) • 13.7 kB
import { MastraBase } from './chunk-WENZPAHS.js'; import { generateSpeech, transcribe } from './chunk-QBZCTB6N.js'; import { MastraError } from './chunk-FJEVLHJT.js'; import { PassThrough } from 'stream'; // src/voice/voice.ts var MastraVoice = class extends MastraBase { listeningModel; speechModel; speaker; realtimeConfig; constructor({ listeningModel, speechModel, speaker, realtimeConfig, name } = {}) { super({ component: "VOICE", name }); this.listeningModel = listeningModel; this.speechModel = speechModel; this.speaker = speaker; this.realtimeConfig = realtimeConfig; } /** * Custom serialization for tracing/observability spans. * Excludes `apiKey` from listeningModel / speechModel / realtimeConfig * and any provider-specific state held by subclasses. Subclasses that * need to expose additional non-sensitive fields can override. */ serializeForSpan() { return { component: "VOICE", name: this.name, speaker: this.speaker, listeningModel: this.listeningModel ? { name: this.listeningModel.name } : void 0, speechModel: this.speechModel ? { name: this.speechModel.name } : void 0, realtimeModel: this.realtimeConfig?.model }; } updateConfig(_options) { this.logger.debug("updateConfig not implemented by this voice provider"); } /** * Initializes a WebSocket or WebRTC connection for real-time communication * @returns Promise that resolves when the connection is established */ connect(_options) { this.logger.debug("connect not implemented by this voice provider"); return Promise.resolve(); } /** * Relay audio data to the voice provider for real-time processing * @param audioData Audio data to relay */ send(_audioData) { this.logger.debug("relay not implemented by this voice provider"); return Promise.resolve(); } /** * Trigger voice providers to respond */ answer(_options) { this.logger.debug("answer not implemented by this voice provider"); return Promise.resolve(); } /** * Equip the voice provider with instructions * @param instructions Instructions to add */ addInstructions(_instructions) { } /** * Equip the voice provider with tools * @param tools Array of tools to add */ addTools(_tools) { } /** * Disconnect from the WebSocket or WebRTC connection */ close() { this.logger.debug("close not implemented by this voice provider"); } /** * Register an event listener * @param event Event name (e.g., 'speaking', 'writing', 'error') * @param callback Callback function that receives event data */ on(_event, _callback) { this.logger.debug("on not implemented by this voice provider"); } /** * Remove an event listener * @param event Event name (e.g., 'speaking', 'writing', 'error') * @param callback Callback function to remove */ off(_event, _callback) { this.logger.debug("off not implemented by this voice provider"); } /** * Get available speakers/voices * @returns Array of available voice IDs and their metadata */ getSpeakers() { this.logger.debug("getSpeakers not implemented by this voice provider"); return Promise.resolve([]); } /** * Get available speakers/voices * @returns Array of available voice IDs and their metadata */ getListener() { this.logger.debug("getListener not implemented by this voice provider"); return Promise.resolve({ enabled: false }); } }; var AISDKSpeech = class extends MastraVoice { model; defaultVoice; constructor(model, options) { super({ name: "ai-sdk-speech" }); this.model = model; this.defaultVoice = options?.voice; } async speak(input, options) { const text = typeof input === "string" ? input : await this.streamToText(input); const result = await generateSpeech({ model: this.model, text, voice: options?.speaker || this.defaultVoice, // Map speaker to AI SDK's voice parameter language: options?.language, providerOptions: options?.providerOptions, abortSignal: options?.abortSignal, headers: options?.headers }); const stream = new PassThrough(); stream.end(Buffer.from(result.audio.uint8Array)); return stream; } async listen() { throw new Error("AI SDK speech models do not support transcription. Use AISDKTranscription instead."); } async getSpeakers() { return []; } async getListener() { return { enabled: false }; } async streamToText(stream) { const chunks = []; for await (const chunk of stream) { chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); } return Buffer.concat(chunks).toString("utf-8"); } }; // src/voice/aisdk/transcription.ts var AISDKTranscription = class extends MastraVoice { model; constructor(model) { super({ name: "ai-sdk-transcription" }); this.model = model; } async speak() { throw new Error("AI SDK transcription models do not support text-to-speech. Use AISDKSpeech instead."); } async getSpeakers() { return []; } async getListener() { return { enabled: true }; } /** * Transcribe audio to text * For enhanced metadata (segments, language, duration), use AI SDK's transcribe() directly */ async listen(audioStream, options) { const audioBuffer = await this.convertToBuffer(audioStream); const result = await transcribe({ model: this.model, audio: audioBuffer, providerOptions: options?.providerOptions, abortSignal: options?.abortSignal, headers: options?.headers }); return result.text; } async convertToBuffer(audio) { if (Buffer.isBuffer(audio)) return audio; if (audio instanceof Uint8Array) return Buffer.from(audio); if (typeof audio === "string") return Buffer.from(audio, "base64"); const chunks = []; for await (const chunk of audio) { chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); } return Buffer.concat(chunks); } }; // src/voice/composite-voice.ts var supportedSpecificationVersions = ["v2", "v3"]; function isTranscriptionModel(obj) { return obj && typeof obj === "object" && obj.modelId && supportedSpecificationVersions.includes(obj.specificationVersion); } function isSpeechModel(obj) { return obj && typeof obj === "object" && obj.modelId && supportedSpecificationVersions.includes(obj.specificationVersion); } var CompositeVoice = class extends MastraVoice { speakProvider; listenProvider; realtimeProvider; constructor({ input, output, realtime }) { super(); if (input) { this.listenProvider = isTranscriptionModel(input) ? new AISDKTranscription(input) : input; } if (output) { this.speakProvider = isSpeechModel(output) ? new AISDKSpeech(output) : output; } this.realtimeProvider = realtime; } /** * Convert text to speech using the configured provider * @param input Text or text stream to convert to speech * @param options Speech options including speaker and provider-specific options * @returns Audio stream or void if in realtime mode */ async speak(input, options) { if (this.realtimeProvider) { return this.realtimeProvider.speak(input, options); } else if (this.speakProvider) { return this.speakProvider.speak(input, options); } throw new MastraError({ id: "VOICE_COMPOSITE_NO_SPEAK_PROVIDER", text: "No speak provider or realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } async listen(audioStream, options) { if (this.realtimeProvider) { return await this.realtimeProvider.listen(audioStream, options); } else if (this.listenProvider) { return await this.listenProvider.listen(audioStream, options); } throw new MastraError({ id: "VOICE_COMPOSITE_NO_LISTEN_PROVIDER", text: "No listen provider or realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } async getSpeakers() { if (this.realtimeProvider) { return this.realtimeProvider.getSpeakers(); } else if (this.speakProvider) { return this.speakProvider.getSpeakers(); } throw new MastraError({ id: "VOICE_COMPOSITE_NO_SPEAKERS_PROVIDER", text: "No speak provider or realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } async getListener() { if (this.realtimeProvider) { return this.realtimeProvider.getListener(); } else if (this.listenProvider) { return this.listenProvider.getListener(); } throw new MastraError({ id: "VOICE_COMPOSITE_NO_LISTENER_PROVIDER", text: "No listener provider or realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } updateConfig(options) { if (!this.realtimeProvider) { return; } this.realtimeProvider.updateConfig(options); } /** * Initializes a WebSocket or WebRTC connection for real-time communication * @returns Promise that resolves when the connection is established */ connect(options) { if (!this.realtimeProvider) { throw new MastraError({ id: "VOICE_COMPOSITE_NO_REALTIME_PROVIDER_CONNECT", text: "No realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } return this.realtimeProvider.connect(options); } /** * Relay audio data to the voice provider for real-time processing * @param audioData Audio data to send */ send(audioData) { if (!this.realtimeProvider) { throw new MastraError({ id: "VOICE_COMPOSITE_NO_REALTIME_PROVIDER_SEND", text: "No realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } return this.realtimeProvider.send(audioData); } /** * Trigger voice providers to respond */ answer(options) { if (!this.realtimeProvider) { throw new MastraError({ id: "VOICE_COMPOSITE_NO_REALTIME_PROVIDER_ANSWER", text: "No realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } return this.realtimeProvider.answer(options); } /** * Equip the voice provider with instructions * @param instructions Instructions to add */ addInstructions(instructions) { if (!this.realtimeProvider) { return; } this.realtimeProvider.addInstructions(instructions); } /** * Equip the voice provider with tools * @param tools Array of tools to add */ addTools(tools) { if (!this.realtimeProvider) { return; } this.realtimeProvider.addTools(tools); } /** * Disconnect from the WebSocket or WebRTC connection */ close() { if (!this.realtimeProvider) { throw new MastraError({ id: "VOICE_COMPOSITE_NO_REALTIME_PROVIDER_CLOSE", text: "No realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } this.realtimeProvider.close(); } /** * Register an event listener * @param event Event name (e.g., 'speaking', 'writing', 'error') * @param callback Callback function that receives event data */ on(event, callback) { if (!this.realtimeProvider) { throw new MastraError({ id: "VOICE_COMPOSITE_NO_REALTIME_PROVIDER_ON", text: "No realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } this.realtimeProvider.on(event, callback); } /** * Remove an event listener * @param event Event name (e.g., 'speaking', 'writing', 'error') * @param callback Callback function to remove */ off(event, callback) { if (!this.realtimeProvider) { throw new MastraError({ id: "VOICE_COMPOSITE_NO_REALTIME_PROVIDER_OFF", text: "No realtime provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } this.realtimeProvider.off(event, callback); } }; // src/voice/default-voice.ts var DefaultVoice = class extends MastraVoice { constructor() { super(); } async speak(_input) { throw new MastraError({ id: "VOICE_DEFAULT_NO_SPEAK_PROVIDER", text: "No voice provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } async listen(_input) { throw new MastraError({ id: "VOICE_DEFAULT_NO_LISTEN_PROVIDER", text: "No voice provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } async getSpeakers() { throw new MastraError({ id: "VOICE_DEFAULT_NO_SPEAKERS_PROVIDER", text: "No voice provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } async getListener() { throw new MastraError({ id: "VOICE_DEFAULT_NO_LISTENER_PROVIDER", text: "No voice provider configured", domain: "MASTRA_VOICE" /* MASTRA_VOICE */, category: "USER" /* USER */ }); } }; export { AISDKSpeech, AISDKTranscription, CompositeVoice, DefaultVoice, MastraVoice }; //# sourceMappingURL=chunk-C6M3GAQR.js.map //# sourceMappingURL=chunk-C6M3GAQR.js.map