contextual-agent-sdk
Version:
SDK for building AI agents with seamless voice-text context switching
263 lines • 9.18 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ModalityRouter = void 0;
class ModalityRouter {
isProcessing = false;
config;
constructor(config = {}) {
this.config = {
useMockWhenUnavailable: true,
...config
};
}
detectModality(input) {
if (this.isAudioInput(input)) {
return 'voice';
}
return 'text';
}
async processMessage(input, modality, sessionId) {
this.isProcessing = true;
try {
let message;
if (modality === 'voice') {
message = await this.processVoiceMessage(input, sessionId);
}
else {
message = await this.processTextMessage(input, sessionId);
}
return message;
}
finally {
this.isProcessing = false;
}
}
async processVoiceMessage(audioInput, sessionId) {
const startTime = Date.now();
try {
let transcriptionResult;
if (this.config.speechToText) {
transcriptionResult = await this.config.speechToText.transcribe(audioInput, this.config.defaultSTTOptions);
}
else if (this.config.useMockWhenUnavailable) {
console.warn('No speech-to-text provider configured. Using mock transcription.');
transcriptionResult = await this.mockSpeechToText(audioInput);
}
else {
throw new Error('No speech-to-text provider configured and mocks are disabled');
}
const voiceMetadata = {
duration: transcriptionResult.duration || this.getAudioDuration(audioInput),
language: transcriptionResult.language || 'en-US',
confidence: transcriptionResult.confidence || 0.95,
};
const message = {
id: this.generateMessageId(),
role: 'user',
content: transcriptionResult.text,
modality: 'voice',
timestamp: new Date(),
metadata: {
voice: voiceMetadata,
performance: {
processingTime: Date.now() - startTime,
apiCalls: [
{
service: 'speech-to-text',
endpoint: this.config.speechToText ? 'custom-provider' : 'mock',
duration: Date.now() - startTime,
status: 200
}
]
}
}
};
return message;
}
catch (error) {
throw new Error(`Voice processing failed: ${error}`);
}
}
async processTextMessage(textInput, sessionId) {
const message = {
id: this.generateMessageId(),
role: 'user',
content: textInput.trim(),
modality: 'text',
timestamp: new Date(),
metadata: {
performance: {
processingTime: 1,
apiCalls: []
}
}
};
return message;
}
async prepareResponse(content, targetModality, sessionId) {
const startTime = Date.now();
const message = {
id: this.generateMessageId(),
role: 'assistant',
content,
modality: targetModality,
timestamp: new Date()
};
if (targetModality === 'voice') {
message.metadata = {
voice: await this.prepareVoiceResponse(content),
performance: {
processingTime: Date.now() - startTime,
apiCalls: []
}
};
}
else {
message.metadata = {
performance: {
processingTime: 1,
apiCalls: []
}
};
}
return message;
}
async prepareVoiceResponse(content) {
if (this.config.textToSpeech) {
try {
const audioResult = await this.config.textToSpeech.synthesize(content, this.config.defaultTTSOptions);
return {
language: 'en-US',
confidence: 1.0,
duration: audioResult.duration || this.estimateVoiceDuration(content)
};
}
catch (error) {
console.error('Text-to-speech failed:', error);
if (!this.config.useMockWhenUnavailable) {
throw error;
}
}
}
if (this.config.useMockWhenUnavailable) {
console.warn('No text-to-speech provider configured. Using mock audio generation.');
return {
language: 'en-US',
confidence: 1.0,
duration: this.estimateVoiceDuration(content)
};
}
throw new Error('No text-to-speech provider configured and mocks are disabled');
}
setSpeechToTextProvider(provider) {
this.config.speechToText = provider;
}
setTextToSpeechProvider(provider) {
this.config.textToSpeech = provider;
}
setDefaultSTTOptions(options) {
this.config.defaultSTTOptions = { ...this.config.defaultSTTOptions, ...options };
}
setDefaultTTSOptions(options) {
this.config.defaultTTSOptions = { ...this.config.defaultTTSOptions, ...options };
}
async transcribeWithOptions(audioInput, options) {
if (!this.config.speechToText) {
if (this.config.useMockWhenUnavailable) {
return this.mockSpeechToText(audioInput);
}
throw new Error('No speech-to-text provider configured');
}
const mergedOptions = { ...this.config.defaultSTTOptions, ...options };
return this.config.speechToText.transcribe(audioInput, mergedOptions);
}
async synthesizeWithOptions(text, options) {
if (!this.config.textToSpeech) {
throw new Error('No text-to-speech provider configured');
}
const mergedOptions = { ...this.config.defaultTTSOptions, ...options };
return this.config.textToSpeech.synthesize(text, mergedOptions);
}
hasSpeechToText() {
return !!this.config.speechToText;
}
hasTextToSpeech() {
return !!this.config.textToSpeech;
}
isAudioInput(input) {
if (!input)
return false;
return (input.type === 'audio' ||
input.mimeType?.startsWith('audio/') ||
input.audioData ||
input.wav ||
input.mp3 ||
input.webm ||
input.blob ||
Buffer.isBuffer(input) ||
input instanceof ArrayBuffer ||
input instanceof Uint8Array);
}
async mockSpeechToText(audioInput) {
await this.delay(100);
if (typeof audioInput === 'string') {
return {
text: audioInput,
confidence: 1.0,
language: 'en-US'
};
}
return {
text: "I'd like to speak with customer service about my order.",
confidence: 0.85,
language: 'en-US',
duration: this.getAudioDuration(audioInput)
};
}
getAudioDuration(audioInput) {
if (audioInput?.duration) {
return audioInput.duration;
}
if (audioInput?.metadata?.duration) {
return audioInput.metadata.duration;
}
if (audioInput?.length || audioInput?.byteLength) {
const bytes = audioInput.length || audioInput.byteLength;
return Math.max(0.5, bytes / 32000);
}
return 3.5;
}
estimateVoiceDuration(text) {
const wordCount = text.split(/\s+/).length;
const wordsPerSecond = 150 / 60;
return Math.max(1, wordCount / wordsPerSecond);
}
generateMessageId() {
return `msg_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
isCurrentlyProcessing() {
return this.isProcessing;
}
isModalitySupported(modality) {
if (modality === 'text')
return true;
if (modality === 'voice') {
return this.hasSpeechToText() || (this.config.useMockWhenUnavailable ?? false);
}
return false;
}
getCapabilities() {
return {
voice: this.isModalitySupported('voice'),
text: true,
speechToText: this.hasSpeechToText(),
textToSpeech: this.hasTextToSpeech(),
usingMocks: (this.config.useMockWhenUnavailable ?? false) && (!this.hasSpeechToText() || !this.hasTextToSpeech())
};
}
}
exports.ModalityRouter = ModalityRouter;
//# sourceMappingURL=ModalityRouter.js.map