@pompeii-labs/audio
Version:
The Audio SDK from Pompeii Labs
197 lines (186 loc) • 5.49 kB
TypeScript
import { A as AudioFormat } from './index-o4B-ThOL.js';
import { DeepgramClient, LiveSchema } from '@deepgram/sdk';
import { HumeClient } from 'hume';
import OpenAI from 'openai';
type Turn = {
speaker?: number;
text: string;
confidence: number;
start?: Date;
end?: Date;
durationMs?: number;
};
type MagmaFlowSTTOutput = {
text: string;
turns?: Turn[];
};
declare abstract class MagmaFlowSpeechToText {
abstract input(audio: Buffer): void;
abstract flush(): void;
abstract kill(): void;
onSpeechDetected(): void;
onOutput(output: MagmaFlowSTTOutput): void;
constructor();
}
declare abstract class MagmaFlowTextToSpeech {
abstract input(text: string, requestId: string): void;
abstract kill(): void;
onOutput(audio: Buffer | null, requestId: string): void;
constructor();
}
type MagmaFlowConfig = {
pauseDurationMs?: number;
sentenceChunkLength?: number;
};
type MagmaFlowArgs = {
stt: MagmaFlowSpeechToText;
tts: MagmaFlowTextToSpeech;
inputFormat: AudioFormat;
outputFormat: AudioFormat;
onSpeechDetected: () => void;
onTranscription: (transcription: MagmaFlowSTTOutput) => void;
onAudioOutput: (audio: Buffer) => void;
config?: MagmaFlowConfig;
};
declare class MagmaFlow {
private stt;
private tts;
private inputFormat;
private outputFormat;
private onAudioOutput;
private textBuffer;
private textQueue;
private generatingAudio;
private currentRequestId;
private audioBuffer;
private lastChunk;
private config;
constructor(args: MagmaFlowArgs);
inputAudio(audio: Buffer): void;
inputText(text: string | null): void;
private generateAudio;
private sendAudio;
interruptTTS(): void;
kill(): void;
}
declare enum DeepgramModel {
NOVA_3 = "nova-3"
}
declare enum DeepgramLanguage {
EN_US = "en-US"
}
type DeepgramWord = {
word: string;
start: number;
end: number;
confidence: number;
language: string;
punctuated_word: string;
speaker?: number;
};
type DeepgramConfig = Omit<LiveSchema, 'model' | 'vad_events' | 'interim_results' | 'encoding' | 'sample_rate' | 'channels' | 'endpointing'>;
type DeepgramSTTArgs = {
client?: DeepgramClient;
model: DeepgramModel;
config?: DeepgramConfig;
};
declare class DeepgramSTT extends MagmaFlowSpeechToText {
private client;
private connection;
private config;
private turnBuffer;
private utteranceEnded;
constructor(args: DeepgramSTTArgs);
private setup;
input(audio: Buffer): void;
flush(): void;
kill(): void;
private handleTranscriptionEvent;
private handleUtteranceEnd;
private sendOutput;
private onOpen;
private keepAlive;
private computeTurns;
}
type DeepgramTTSArgs = {
client?: DeepgramClient;
};
declare class DeepgramTTS extends MagmaFlowTextToSpeech {
private client;
constructor(args: DeepgramTTSArgs);
setup(): Promise<void>;
input(text: string | null, requestId: string): void;
kill(): void;
reset(): void;
}
declare enum ElevenLabsVoice {
chris = "iP95p4xoKVk53GoZ742B",
josh = "TxGEqnHWrfWFTfGW9XjX",
rachel = "21m00Tcm4TlvDq8ikWAM",
laura = "FGY2WhTYpPnrIDTdsKH5",
felicity = "aTbnroHRGIomiKpqAQR8"
}
type StreamSpeechConfig = {
text: string;
model_id?: string;
language_code?: string;
voice_settings?: {
stability?: number;
use_speaker_boost?: boolean;
similarity_boost?: number;
style?: number;
speed?: number;
};
pronunciation_dictionary_locators?: {
pronunciation_dictionary_id: string;
version_id?: string;
}[];
seed?: number;
next_text?: string;
previous_request_ids?: string[];
next_request_ids?: string[];
apply_text_normalization?: 'auto' | 'on' | 'off';
apply_language_text_normalization?: boolean;
};
type ElevenLabsConfig = Omit<StreamSpeechConfig, 'text' | 'model_id'>;
type ElevenLabsTTSArgs = {
model: string;
voice: ElevenLabsVoice | string;
config?: ElevenLabsConfig;
apiKey?: string;
};
declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
private apiKey;
private model;
private voice;
private config;
constructor(args: ElevenLabsTTSArgs);
setup(): Promise<void>;
input(text: string | null, requestId: string): void;
kill(): void;
reset(): void;
}
type HumeTTSArgs = {
client?: HumeClient;
};
declare class HumeTTS extends MagmaFlowTextToSpeech {
private client;
constructor(args: HumeTTSArgs);
setup(): Promise<void>;
input(text: string | null, requestId: string): void;
kill(): void;
reset(): void;
}
type WhisperTTSArgs = {
client?: OpenAI;
};
declare class WhisperTTS extends MagmaFlowTextToSpeech {
private client;
constructor(args: WhisperTTSArgs);
setup(): Promise<void>;
input(text: string | null, requestId: string): void;
kill(): void;
reset(): void;
}
declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };