UNPKG

contextual-agent-sdk

Version:

SDK for building AI agents with seamless voice-text context switching

98 lines 3.38 kB
import { Modality, Message } from '../types'; export interface SpeechToTextProvider { transcribe(audioInput: any, options?: { language?: string; model?: string; temperature?: number; responseFormat?: 'json' | 'text' | 'verbose_json'; prompt?: string; [key: string]: any; }): Promise<{ text: string; confidence?: number; language?: string; duration?: number; segments?: Array<{ start: number; end: number; text: string; confidence?: number; }>; [key: string]: any; }>; } export interface TextToSpeechProvider { synthesize(text: string, options?: { voice?: string; language?: string; speed?: number; pitch?: number; volume?: number; format?: 'mp3' | 'wav' | 'ogg' | 'flac'; sampleRate?: number; [key: string]: any; }): Promise<{ audioData: any; duration?: number; format?: string; sampleRate?: number; [key: string]: any; }>; } export interface ModalityRouterConfig { speechToText?: SpeechToTextProvider; textToSpeech?: TextToSpeechProvider; useMockWhenUnavailable?: boolean; defaultSTTOptions?: { language?: string; model?: string; temperature?: number; responseFormat?: 'json' | 'text' | 'verbose_json'; prompt?: string; [key: string]: any; }; defaultTTSOptions?: { voice?: string; language?: string; speed?: number; pitch?: number; volume?: number; format?: 'mp3' | 'wav' | 'ogg' | 'flac'; [key: string]: any; }; } export declare class ModalityRouter { private isProcessing; private config; constructor(config?: ModalityRouterConfig); detectModality(input: any): Modality; processMessage(input: any, modality: Modality, sessionId: string): Promise<Message>; private processVoiceMessage; private processTextMessage; prepareResponse(content: string, targetModality: Modality, sessionId: string): Promise<Message>; private prepareVoiceResponse; setSpeechToTextProvider(provider: SpeechToTextProvider): void; setTextToSpeechProvider(provider: TextToSpeechProvider): void; setDefaultSTTOptions(options: NonNullable<ModalityRouterConfig['defaultSTTOptions']>): void; setDefaultTTSOptions(options: NonNullable<ModalityRouterConfig['defaultTTSOptions']>): void; transcribeWithOptions(audioInput: any, options?: Parameters<SpeechToTextProvider['transcribe']>[1]): Promise<ReturnType<SpeechToTextProvider['transcribe']>>; synthesizeWithOptions(text: string, options?: Parameters<TextToSpeechProvider['synthesize']>[1]): Promise<ReturnType<TextToSpeechProvider['synthesize']>>; hasSpeechToText(): boolean; hasTextToSpeech(): boolean; private isAudioInput; private mockSpeechToText; private getAudioDuration; private estimateVoiceDuration; private generateMessageId; private delay; isCurrentlyProcessing(): boolean; isModalitySupported(modality: Modality): boolean; getCapabilities(): { voice: boolean; text: boolean; speechToText: boolean; textToSpeech: boolean; usingMocks: boolean; }; } //# sourceMappingURL=ModalityRouter.d.ts.map