UNPKG

contextual-agent-sdk

Version:

SDK for building AI agents with seamless voice-text context switching

263 lines 9.18 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ModalityRouter = void 0; class ModalityRouter { isProcessing = false; config; constructor(config = {}) { this.config = { useMockWhenUnavailable: true, ...config }; } detectModality(input) { if (this.isAudioInput(input)) { return 'voice'; } return 'text'; } async processMessage(input, modality, sessionId) { this.isProcessing = true; try { let message; if (modality === 'voice') { message = await this.processVoiceMessage(input, sessionId); } else { message = await this.processTextMessage(input, sessionId); } return message; } finally { this.isProcessing = false; } } async processVoiceMessage(audioInput, sessionId) { const startTime = Date.now(); try { let transcriptionResult; if (this.config.speechToText) { transcriptionResult = await this.config.speechToText.transcribe(audioInput, this.config.defaultSTTOptions); } else if (this.config.useMockWhenUnavailable) { console.warn('No speech-to-text provider configured. Using mock transcription.'); transcriptionResult = await this.mockSpeechToText(audioInput); } else { throw new Error('No speech-to-text provider configured and mocks are disabled'); } const voiceMetadata = { duration: transcriptionResult.duration || this.getAudioDuration(audioInput), language: transcriptionResult.language || 'en-US', confidence: transcriptionResult.confidence || 0.95, }; const message = { id: this.generateMessageId(), role: 'user', content: transcriptionResult.text, modality: 'voice', timestamp: new Date(), metadata: { voice: voiceMetadata, performance: { processingTime: Date.now() - startTime, apiCalls: [ { service: 'speech-to-text', endpoint: this.config.speechToText ? 'custom-provider' : 'mock', duration: Date.now() - startTime, status: 200 } ] } } }; return message; } catch (error) { throw new Error(`Voice processing failed: ${error}`); } } async processTextMessage(textInput, sessionId) { const message = { id: this.generateMessageId(), role: 'user', content: textInput.trim(), modality: 'text', timestamp: new Date(), metadata: { performance: { processingTime: 1, apiCalls: [] } } }; return message; } async prepareResponse(content, targetModality, sessionId) { const startTime = Date.now(); const message = { id: this.generateMessageId(), role: 'assistant', content, modality: targetModality, timestamp: new Date() }; if (targetModality === 'voice') { message.metadata = { voice: await this.prepareVoiceResponse(content), performance: { processingTime: Date.now() - startTime, apiCalls: [] } }; } else { message.metadata = { performance: { processingTime: 1, apiCalls: [] } }; } return message; } async prepareVoiceResponse(content) { if (this.config.textToSpeech) { try { const audioResult = await this.config.textToSpeech.synthesize(content, this.config.defaultTTSOptions); return { language: 'en-US', confidence: 1.0, duration: audioResult.duration || this.estimateVoiceDuration(content) }; } catch (error) { console.error('Text-to-speech failed:', error); if (!this.config.useMockWhenUnavailable) { throw error; } } } if (this.config.useMockWhenUnavailable) { console.warn('No text-to-speech provider configured. Using mock audio generation.'); return { language: 'en-US', confidence: 1.0, duration: this.estimateVoiceDuration(content) }; } throw new Error('No text-to-speech provider configured and mocks are disabled'); } setSpeechToTextProvider(provider) { this.config.speechToText = provider; } setTextToSpeechProvider(provider) { this.config.textToSpeech = provider; } setDefaultSTTOptions(options) { this.config.defaultSTTOptions = { ...this.config.defaultSTTOptions, ...options }; } setDefaultTTSOptions(options) { this.config.defaultTTSOptions = { ...this.config.defaultTTSOptions, ...options }; } async transcribeWithOptions(audioInput, options) { if (!this.config.speechToText) { if (this.config.useMockWhenUnavailable) { return this.mockSpeechToText(audioInput); } throw new Error('No speech-to-text provider configured'); } const mergedOptions = { ...this.config.defaultSTTOptions, ...options }; return this.config.speechToText.transcribe(audioInput, mergedOptions); } async synthesizeWithOptions(text, options) { if (!this.config.textToSpeech) { throw new Error('No text-to-speech provider configured'); } const mergedOptions = { ...this.config.defaultTTSOptions, ...options }; return this.config.textToSpeech.synthesize(text, mergedOptions); } hasSpeechToText() { return !!this.config.speechToText; } hasTextToSpeech() { return !!this.config.textToSpeech; } isAudioInput(input) { if (!input) return false; return (input.type === 'audio' || input.mimeType?.startsWith('audio/') || input.audioData || input.wav || input.mp3 || input.webm || input.blob || Buffer.isBuffer(input) || input instanceof ArrayBuffer || input instanceof Uint8Array); } async mockSpeechToText(audioInput) { await this.delay(100); if (typeof audioInput === 'string') { return { text: audioInput, confidence: 1.0, language: 'en-US' }; } return { text: "I'd like to speak with customer service about my order.", confidence: 0.85, language: 'en-US', duration: this.getAudioDuration(audioInput) }; } getAudioDuration(audioInput) { if (audioInput?.duration) { return audioInput.duration; } if (audioInput?.metadata?.duration) { return audioInput.metadata.duration; } if (audioInput?.length || audioInput?.byteLength) { const bytes = audioInput.length || audioInput.byteLength; return Math.max(0.5, bytes / 32000); } return 3.5; } estimateVoiceDuration(text) { const wordCount = text.split(/\s+/).length; const wordsPerSecond = 150 / 60; return Math.max(1, wordCount / wordsPerSecond); } generateMessageId() { return `msg_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; } delay(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } isCurrentlyProcessing() { return this.isProcessing; } isModalitySupported(modality) { if (modality === 'text') return true; if (modality === 'voice') { return this.hasSpeechToText() || (this.config.useMockWhenUnavailable ?? false); } return false; } getCapabilities() { return { voice: this.isModalitySupported('voice'), text: true, speechToText: this.hasSpeechToText(), textToSpeech: this.hasTextToSpeech(), usingMocks: (this.config.useMockWhenUnavailable ?? false) && (!this.hasSpeechToText() || !this.hasTextToSpeech()) }; } } exports.ModalityRouter = ModalityRouter; //# sourceMappingURL=ModalityRouter.js.map