contextual-agent-sdk
Version:
SDK for building AI agents with seamless voice-text context switching
98 lines • 3.38 kB
TypeScript
import { Modality, Message } from '../types';
export interface SpeechToTextProvider {
transcribe(audioInput: any, options?: {
language?: string;
model?: string;
temperature?: number;
responseFormat?: 'json' | 'text' | 'verbose_json';
prompt?: string;
[key: string]: any;
}): Promise<{
text: string;
confidence?: number;
language?: string;
duration?: number;
segments?: Array<{
start: number;
end: number;
text: string;
confidence?: number;
}>;
[key: string]: any;
}>;
}
export interface TextToSpeechProvider {
synthesize(text: string, options?: {
voice?: string;
language?: string;
speed?: number;
pitch?: number;
volume?: number;
format?: 'mp3' | 'wav' | 'ogg' | 'flac';
sampleRate?: number;
[key: string]: any;
}): Promise<{
audioData: any;
duration?: number;
format?: string;
sampleRate?: number;
[key: string]: any;
}>;
}
export interface ModalityRouterConfig {
speechToText?: SpeechToTextProvider;
textToSpeech?: TextToSpeechProvider;
useMockWhenUnavailable?: boolean;
defaultSTTOptions?: {
language?: string;
model?: string;
temperature?: number;
responseFormat?: 'json' | 'text' | 'verbose_json';
prompt?: string;
[key: string]: any;
};
defaultTTSOptions?: {
voice?: string;
language?: string;
speed?: number;
pitch?: number;
volume?: number;
format?: 'mp3' | 'wav' | 'ogg' | 'flac';
[key: string]: any;
};
}
export declare class ModalityRouter {
private isProcessing;
private config;
constructor(config?: ModalityRouterConfig);
detectModality(input: any): Modality;
processMessage(input: any, modality: Modality, sessionId: string): Promise<Message>;
private processVoiceMessage;
private processTextMessage;
prepareResponse(content: string, targetModality: Modality, sessionId: string): Promise<Message>;
private prepareVoiceResponse;
setSpeechToTextProvider(provider: SpeechToTextProvider): void;
setTextToSpeechProvider(provider: TextToSpeechProvider): void;
setDefaultSTTOptions(options: NonNullable<ModalityRouterConfig['defaultSTTOptions']>): void;
setDefaultTTSOptions(options: NonNullable<ModalityRouterConfig['defaultTTSOptions']>): void;
transcribeWithOptions(audioInput: any, options?: Parameters<SpeechToTextProvider['transcribe']>[1]): Promise<ReturnType<SpeechToTextProvider['transcribe']>>;
synthesizeWithOptions(text: string, options?: Parameters<TextToSpeechProvider['synthesize']>[1]): Promise<ReturnType<TextToSpeechProvider['synthesize']>>;
hasSpeechToText(): boolean;
hasTextToSpeech(): boolean;
private isAudioInput;
private mockSpeechToText;
private getAudioDuration;
private estimateVoiceDuration;
private generateMessageId;
private delay;
isCurrentlyProcessing(): boolean;
isModalitySupported(modality: Modality): boolean;
getCapabilities(): {
voice: boolean;
text: boolean;
speechToText: boolean;
textToSpeech: boolean;
usingMocks: boolean;
};
}
//# sourceMappingURL=ModalityRouter.d.ts.map