UNPKG

@pompeii-labs/audio

Version:
197 lines (186 loc) 5.49 kB
import { A as AudioFormat } from './index-o4B-ThOL.js'; import { DeepgramClient, LiveSchema } from '@deepgram/sdk'; import { HumeClient } from 'hume'; import OpenAI from 'openai'; type Turn = { speaker?: number; text: string; confidence: number; start?: Date; end?: Date; durationMs?: number; }; type MagmaFlowSTTOutput = { text: string; turns?: Turn[]; }; declare abstract class MagmaFlowSpeechToText { abstract input(audio: Buffer): void; abstract flush(): void; abstract kill(): void; onSpeechDetected(): void; onOutput(output: MagmaFlowSTTOutput): void; constructor(); } declare abstract class MagmaFlowTextToSpeech { abstract input(text: string, requestId: string): void; abstract kill(): void; onOutput(audio: Buffer | null, requestId: string): void; constructor(); } type MagmaFlowConfig = { pauseDurationMs?: number; sentenceChunkLength?: number; }; type MagmaFlowArgs = { stt: MagmaFlowSpeechToText; tts: MagmaFlowTextToSpeech; inputFormat: AudioFormat; outputFormat: AudioFormat; onSpeechDetected: () => void; onTranscription: (transcription: MagmaFlowSTTOutput) => void; onAudioOutput: (audio: Buffer) => void; config?: MagmaFlowConfig; }; declare class MagmaFlow { private stt; private tts; private inputFormat; private outputFormat; private onAudioOutput; private textBuffer; private textQueue; private generatingAudio; private currentRequestId; private audioBuffer; private lastChunk; private config; constructor(args: MagmaFlowArgs); inputAudio(audio: Buffer): void; inputText(text: string | null): void; private generateAudio; private sendAudio; interruptTTS(): void; kill(): void; } declare enum DeepgramModel { NOVA_3 = "nova-3" } declare enum DeepgramLanguage { EN_US = "en-US" } type DeepgramWord = { word: string; start: number; end: number; confidence: number; language: string; punctuated_word: string; speaker?: number; }; type DeepgramConfig = Omit<LiveSchema, 'model' | 'vad_events' | 'interim_results' | 'encoding' | 'sample_rate' | 'channels' | 'endpointing'>; type DeepgramSTTArgs = { client?: DeepgramClient; model: DeepgramModel; config?: DeepgramConfig; }; declare class DeepgramSTT extends MagmaFlowSpeechToText { private client; private connection; private config; private turnBuffer; private utteranceEnded; constructor(args: DeepgramSTTArgs); private setup; input(audio: Buffer): void; flush(): void; kill(): void; private handleTranscriptionEvent; private handleUtteranceEnd; private sendOutput; private onOpen; private keepAlive; private computeTurns; } type DeepgramTTSArgs = { client?: DeepgramClient; }; declare class DeepgramTTS extends MagmaFlowTextToSpeech { private client; constructor(args: DeepgramTTSArgs); setup(): Promise<void>; input(text: string | null, requestId: string): void; kill(): void; reset(): void; } declare enum ElevenLabsVoice { chris = "iP95p4xoKVk53GoZ742B", josh = "TxGEqnHWrfWFTfGW9XjX", rachel = "21m00Tcm4TlvDq8ikWAM", laura = "FGY2WhTYpPnrIDTdsKH5", felicity = "aTbnroHRGIomiKpqAQR8" } type StreamSpeechConfig = { text: string; model_id?: string; language_code?: string; voice_settings?: { stability?: number; use_speaker_boost?: boolean; similarity_boost?: number; style?: number; speed?: number; }; pronunciation_dictionary_locators?: { pronunciation_dictionary_id: string; version_id?: string; }[]; seed?: number; next_text?: string; previous_request_ids?: string[]; next_request_ids?: string[]; apply_text_normalization?: 'auto' | 'on' | 'off'; apply_language_text_normalization?: boolean; }; type ElevenLabsConfig = Omit<StreamSpeechConfig, 'text' | 'model_id'>; type ElevenLabsTTSArgs = { model: string; voice: ElevenLabsVoice | string; config?: ElevenLabsConfig; apiKey?: string; }; declare class ElevenLabsTTS extends MagmaFlowTextToSpeech { private apiKey; private model; private voice; private config; constructor(args: ElevenLabsTTSArgs); setup(): Promise<void>; input(text: string | null, requestId: string): void; kill(): void; reset(): void; } type HumeTTSArgs = { client?: HumeClient; }; declare class HumeTTS extends MagmaFlowTextToSpeech { private client; constructor(args: HumeTTSArgs); setup(): Promise<void>; input(text: string | null, requestId: string): void; kill(): void; reset(): void; } type WhisperTTSArgs = { client?: OpenAI; }; declare class WhisperTTS extends MagmaFlowTextToSpeech { private client; constructor(args: WhisperTTSArgs); setup(): Promise<void>; input(text: string | null, requestId: string): void; kill(): void; reset(): void; } declare function splitTextIntoChunks(text: string, targetLength?: number): string[]; export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };