susurro-audio
Version:
🎙️ Real-time conversational audio with AI transcription. Build ChatGPT-style voice interfaces in minutes with <300ms latency
502 lines (491 loc) • 16.8 kB
TypeScript
interface WhisperConfig {
apiKey?: string;
model?: 'whisper-1' | string;
language?: string;
temperature?: number;
prompt?: string;
responseFormat?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt';
}
interface TranscriptionResult {
text: string;
language?: string;
duration?: number;
segments?: TranscriptionSegment[];
chunkIndex: number;
timestamp: number;
}
interface AudioChunk {
id: string;
blob: Blob;
duration: number;
startTime: number;
endTime: number;
vadScore?: number;
}
interface ProcessingStatus {
isProcessing: boolean;
currentChunk: number;
totalChunks: number;
stage: 'idle' | 'recording' | 'processing' | 'transcribing' | 'complete' | 'error';
}
interface TranscriptionSegment {
id: number;
seek: number;
start: number;
end: number;
text: string;
tokens: number[];
temperature: number;
avg_logprob: number;
compression_ratio: number;
no_speech_prob: number;
}
interface SusurroChunk {
id: string;
audioUrl: string;
transcript: string;
startTime: number;
endTime: number;
vadScore: number;
isComplete: boolean;
processingLatency?: number;
metadata?: Record<string, unknown>;
}
type OnChunkCallback = (chunk: SusurroChunk) => void;
interface ConversationalOptions {
onChunk?: OnChunkCallback;
enableInstantTranscription?: boolean;
chunkTimeout?: number;
enableChunkEnrichment?: boolean;
}
interface UseSusurroOptions$1 {
chunkDurationMs?: number;
enableVAD?: boolean;
whisperConfig?: WhisperConfig;
conversational?: ConversationalOptions;
onWhisperProgressLog?: (message: string, type?: 'info' | 'warning' | 'error' | 'success') => void;
}
interface AudioEngineConfig {
enableVAD?: boolean;
enableNoiseSuppression?: boolean;
enableEchoCancellation?: boolean;
vadThreshold?: number;
wasmPath?: string;
}
interface CompleteAudioResult {
originalAudioUrl: string;
processedAudioUrl: string;
transcriptionText: string;
transcriptionSegments?: TranscriptionSegment[];
vadAnalysis: VADAnalysisResult;
metadata: AudioMetadata;
processingTime: number;
}
interface RecordingConfig {
chunkDuration?: number;
vadThreshold?: number;
enableRealTimeTranscription?: boolean;
enableNoiseReduction?: boolean;
}
interface StreamingSusurroChunk {
id: string;
audioBlob: Blob;
transcriptionText: string;
vadScore: number;
timestamp: number;
duration: number;
isVoiceActive: boolean;
}
interface VADAnalysisResult {
averageVad: number;
vadScores: number[];
metrics: ProcessingMetrics[];
voiceSegments: VoiceSegment[];
}
interface VoiceSegment {
startTime: number;
endTime: number;
vadScore: number;
confidence: number;
}
interface AudioMetadata {
duration: number;
sampleRate: number;
channels: number;
fileSize: number;
processedSize: number;
}
interface ProcessingMetrics {
vad?: number;
energy?: number;
pitch?: number;
snr?: number;
timestamp?: number;
frame?: number;
}
interface ChunkMiddleware {
name: string;
process: (chunk: SusurroChunk) => Promise<SusurroChunk>;
enabled: boolean;
priority: number;
}
interface MiddlewareContext {
startTime: number;
processingStage: 'pre-emit' | 'post-emit';
metadata: Record<string, unknown>;
}
declare const translationMiddleware: ChunkMiddleware;
declare const sentimentMiddleware: ChunkMiddleware;
declare const intentMiddleware: ChunkMiddleware;
declare const qualityMiddleware: ChunkMiddleware;
declare class ChunkMiddlewarePipeline {
private middlewares;
constructor();
register(middleware: ChunkMiddleware): void;
unregister(name: string): void;
enable(name: string): void;
disable(name: string): void;
process(chunk: SusurroChunk): Promise<SusurroChunk>;
getStatus(): {
name: string;
enabled: boolean;
priority: number;
}[];
}
interface LatencyMetrics {
audioToEmitLatency: number;
audioProcessingLatency: number;
transcriptionLatency: number;
middlewareLatency: number;
chunkId: string;
timestamp: number;
vadScore?: number;
audioSize?: number;
}
interface LatencyReport {
averageLatency: number;
medianLatency: number;
p95Latency: number;
p99Latency: number;
minLatency: number;
maxLatency: number;
targetMet: boolean;
sampleCount: number;
timeRange: {
start: number;
end: number;
};
breakdown: {
audioProcessing: number;
transcription: number;
middleware: number;
};
}
interface PerformanceOptimization {
name: string;
description: string;
expectedLatencyReduction: number;
condition: (metrics: LatencyMetrics) => boolean;
apply: () => Promise<void>;
}
declare class LatencyMonitor {
private metrics;
private maxMetrics;
private target;
private optimizations;
constructor(targetLatency?: number);
recordMetrics(metrics: Omit<LatencyMetrics, 'timestamp'>): void;
generateReport(lastNMinutes?: number): LatencyReport;
getRealtimeStatus(): {
isHealthy: boolean;
currentLatency: number;
trend: 'improving' | 'degrading' | 'stable';
lastOptimization?: string;
};
private setupOptimizations;
private checkForOptimizations;
private getPercentile;
private calculateAverageBreakdown;
private getEmptyReport;
private listeners;
private emit;
on(event: string, listener: (data: unknown) => void): void;
off(event: string, listener: (data: unknown) => void): void;
exportMetrics(format?: 'json' | 'csv'): string;
clear(): void;
getMetricsCount(): number;
}
interface UseLatencyMonitorReturn {
latencyReport: LatencyReport;
latencyStatus: {
isHealthy: boolean;
currentLatency: number;
trend: 'improving' | 'degrading' | 'stable';
lastOptimization?: string;
};
recordMetrics: (metrics: Omit<LatencyMetrics, 'timestamp'>) => void;
exportMetrics: (format?: 'json' | 'csv') => string;
clear: () => void;
getMetricsCount: () => number;
onOptimization: (listener: (data: unknown) => void) => void;
offOptimization: (listener: (data: unknown) => void) => void;
}
/**
* Hook-based latency monitor
* Replaces the singleton latencyMonitor with modern React patterns
*/
declare function useLatencyMonitor(targetLatency?: number): UseLatencyMonitorReturn;
interface UseSusurroOptions extends UseSusurroOptions$1 {
onWhisperProgressLog?: (message: string, type?: 'info' | 'warning' | 'error' | 'success') => void;
initialModel?: 'tiny' | 'base' | 'small' | 'medium' | 'large';
engineConfig?: {
bufferSize?: number;
denoiseStrength?: number;
enableMetrics?: boolean;
noiseReductionLevel?: 'low' | 'medium' | 'high';
algorithm?: 'rnnoise';
};
}
interface UseSusurroReturn {
isRecording: boolean;
isProcessing: boolean;
transcriptions: TranscriptionResult[];
audioChunks: AudioChunk[];
processingStatus: ProcessingStatus;
averageVad: number;
startRecording: (config?: RecordingConfig) => Promise<void>;
stopRecording: () => void;
pauseRecording: () => void;
resumeRecording: () => void;
clearTranscriptions: () => void;
whisperReady: boolean;
whisperProgress: number;
whisperError: Error | string | null;
transcribeWithWhisper: (blob: Blob) => Promise<TranscriptionResult | null>;
exportChunkAsWav: (chunkId: string) => Promise<Blob>;
conversationalChunks: SusurroChunk[];
clearConversationalChunks: () => void;
middlewarePipeline: ChunkMiddlewarePipeline;
latencyReport: ReturnType<typeof useLatencyMonitor>['latencyReport'];
latencyStatus: ReturnType<typeof useLatencyMonitor>['latencyStatus'];
initializeAudioEngine: () => Promise<void>;
resetAudioEngine: () => Promise<void>;
isEngineInitialized: boolean;
engineError: string | null;
isInitializingEngine: boolean;
processAndTranscribeFile: (file: File) => Promise<CompleteAudioResult>;
startStreamingRecording: (onChunk: (chunk: StreamingSusurroChunk) => void, config?: RecordingConfig) => Promise<void>;
stopStreamingRecording: () => Promise<StreamingSusurroChunk[]>;
analyzeVAD: (buffer: ArrayBuffer) => Promise<{
averageVad: number;
vadScores: number[];
metrics: unknown[];
voiceSegments: Array<{
startTime: number;
endTime: number;
vadScore: number;
confidence: number;
}>;
}>;
convertBlobToBuffer: (blob: Blob) => Promise<ArrayBuffer>;
currentStream: MediaStream | null;
}
declare function useSusurro(options?: UseSusurroOptions): UseSusurroReturn;
interface AlertHandle {
update: (config: Partial<AlertConfig>) => void;
close: () => void;
}
interface AlertConfig {
title: string;
message: string;
type: 'loading' | 'error' | 'success' | 'warning' | 'info';
progress?: number;
}
interface ToastService {
success: (message: string) => void;
error: (message: string) => void;
warning: (message: string) => void;
info: (message: string) => void;
}
interface AlertService {
show: (config: AlertConfig) => AlertHandle;
}
declare const defaultAlertService: AlertService;
declare const defaultToastService: ToastService;
interface MurmurabaConfig {
enableAGC?: boolean;
enableNoiseSuppression?: boolean;
enableEchoCancellation?: boolean;
enableVAD?: boolean;
wasmPath?: string;
vadThreshold?: number;
}
interface MurmurabaMetrics {
vad?: number;
energy?: number;
pitch?: number;
snr?: number;
timestamp?: number;
}
interface MurmurabaChunk {
audioBuffer: AudioBuffer;
blob?: Blob;
startTime: number;
endTime: number;
vadScore?: number;
}
interface MurmurabaResult {
processedBuffer?: ArrayBuffer | Blob | AudioBuffer;
processedAudio?: ArrayBuffer | Blob | AudioBuffer;
vadScores?: number[];
metrics?: MurmurabaMetrics[];
averageVad?: number;
chunks?: MurmurabaChunk[];
}
interface MurmurabaInstance {
isInitialized: boolean;
initializeAudioEngine(config?: MurmurabaConfig): Promise<void>;
destroyEngine?(): Promise<void>;
processFile(buffer: ArrayBuffer, options?: Record<string, unknown>): Promise<MurmurabaResult | ArrayBuffer>;
processFileWithMetrics?(buffer: ArrayBuffer, onFrameProcessed?: (metrics: MurmurabaMetrics) => void): Promise<MurmurabaResult>;
processStreamChunked?(stream: ReadableStream<Uint8Array>, options: {
chunkDuration?: number;
onChunkProcessed?: (chunk: MurmurabaChunk) => void;
}): Promise<MurmurabaChunk[]>;
analyzeVAD?(buffer: ArrayBuffer): Promise<{
metrics?: MurmurabaMetrics[];
averageVad?: number;
average?: number;
scores?: number[];
}>;
}
interface CacheStatus {
hasCache: boolean;
cacheSize?: number;
lastUpdated?: Date;
}
interface UseModelCacheReturn {
cacheStatus: CacheStatus;
storeModel: (modelId: string, data: ArrayBuffer) => Promise<void>;
getModel: (modelId: string) => Promise<ArrayBuffer | null>;
hasModel: (modelId: string) => Promise<boolean>;
clearCache: () => Promise<void>;
getStorageInfo: () => Promise<{
usage: number;
quota: number;
} | null>;
requestPersistentStorage: () => Promise<boolean>;
refreshCacheStatus: () => Promise<void>;
}
/**
* Hook-based model cache manager
* Replaces the singleton ModelCacheManager with modern React patterns
*/
declare function useModelCache(): UseModelCacheReturn;
/**
* Audio Processing Constants
* Centralized configuration for all audio-related magic numbers
*/
declare const AUDIO_CONFIG: {
readonly SAMPLE_RATE: 44100;
readonly ALTERNATIVE_SAMPLE_RATE: 48000;
readonly VAD: {
readonly FRAME_SAMPLES: 1536;
readonly POSITIVE_THRESHOLD: 0.6;
readonly NEGATIVE_THRESHOLD: 0.4;
readonly MIN_SPEECH_DURATION_MS: 250;
readonly PRE_SPEECH_PAD_FRAMES: 1;
readonly POST_SPEECH_PAD_FRAMES: 1;
};
readonly RECORDING: {
readonly DEFAULT_CHUNK_DURATION_MS: 8000;
readonly STREAMING_CHUNK_DURATION_MS: 3000;
readonly MIN_CHUNK_DURATION_MS: 1000;
readonly MAX_CHUNK_DURATION_MS: 30000;
};
readonly TIMEOUTS: {
readonly MODEL_DOWNLOAD_MS: 180000;
readonly TRANSCRIPTION_MS: 120000;
readonly CHUNK_EMISSION_MS: 2000;
readonly ENGINE_RESET_DELAY_MS: 100;
readonly DEPENDENCY_PRELOAD_MS: 2000;
};
readonly BUFFERS: {
readonly AUDIO_WORKLET_SIZE: 128;
readonly PROCESSING_BUFFER_SIZE: 4096;
readonly MAX_AUDIO_CHUNKS: 50;
};
readonly THRESHOLDS: {
readonly MIN_AUDIO_LEVEL: 0.01;
readonly SILENCE_THRESHOLD: 0.02;
readonly NOISE_GATE: 0.001;
};
readonly PERFORMANCE: {
readonly WEBGPU_PREFERRED: true;
readonly QUANTIZATION_BITS: 4;
readonly MAX_CONCURRENT_WORKERS: 4;
};
};
declare const WHISPER_CONFIG: {
readonly MODELS: {
readonly DISTIL_LARGE_V3: "Xenova/distil-whisper/distil-large-v3";
readonly FALLBACK: readonly ["whisper-tiny", "Xenova/whisper-base", "Xenova/whisper-small"];
};
readonly CHUNK_LENGTH_S: 30;
readonly STRIDE_LENGTH_S: 5;
readonly CACHE_DIR: "transformers-cache";
readonly USE_BROWSER_CACHE: true;
};
declare const ERROR_MESSAGES: {
readonly ENGINE_INIT_FAILED: "Failed to initialize audio engine";
readonly TRANSCRIPTION_FAILED: "Transcription failed";
readonly VAD_ANALYSIS_FAILED: "VAD analysis failed";
readonly RECORDING_FAILED: "Failed to start recording";
readonly STREAM_ACCESS_DENIED: "Microphone access denied";
readonly MODEL_LOAD_FAILED: "Failed to load Whisper model";
readonly AUDIO_CONTEXT_FAILED: "Failed to create audio context";
};
/**
* Error Handling Utilities
* Centralized error handling for audio processing
*/
declare class AudioProcessingError extends Error {
readonly context: string;
readonly originalError?: unknown | undefined;
readonly recoverable: boolean;
constructor(message: string, context: string, originalError?: unknown | undefined, recoverable?: boolean);
}
declare class TranscriptionError extends AudioProcessingError {
constructor(message: string, originalError?: unknown);
}
declare class RecordingError extends AudioProcessingError {
constructor(message: string, originalError?: unknown);
}
declare class VADError extends AudioProcessingError {
constructor(message: string, originalError?: unknown);
}
/**
* Unified error handler for audio processing operations
*/
declare const handleAudioError: (error: unknown, context: string, fallbackMessage?: string) => never;
/**
* Safe error handler that logs but doesn't throw
*/
declare const logAudioError: (error: unknown, context: string, logger?: (message: string, type: string) => void) => void;
/**
* Extract error message from unknown error type
*/
declare const getErrorMessage: (error: unknown) => string;
/**
* Retry handler for transient failures
*/
declare const retryWithBackoff: <T>(operation: () => Promise<T>, maxRetries?: number, initialDelay?: number, context?: string) => Promise<T>;
/**
* Error boundary hook for React components
*/
declare const useErrorHandler: () => {
error: AudioProcessingError | null;
handleError: (error: unknown, context: string) => void;
resetError: () => void;
};
export { AUDIO_CONFIG, type AlertConfig, type AlertHandle, type AlertService, type AudioChunk, type AudioEngineConfig, type AudioMetadata, AudioProcessingError, type ChunkMiddleware, ChunkMiddlewarePipeline, type CompleteAudioResult, type ConversationalOptions, ERROR_MESSAGES, type LatencyMetrics, LatencyMonitor, type LatencyReport, type MiddlewareContext, type MurmurabaChunk, type MurmurabaConfig, type MurmurabaInstance, type MurmurabaMetrics, type MurmurabaResult, type OnChunkCallback, type PerformanceOptimization, type ProcessingMetrics, type ProcessingStatus, type RecordingConfig, RecordingError, type StreamingSusurroChunk, type SusurroChunk, type ToastService, TranscriptionError, type TranscriptionResult, type TranscriptionSegment, type UseSusurroOptions, type UseSusurroReturn, type VADAnalysisResult, VADError, type VoiceSegment, WHISPER_CONFIG, type WhisperConfig, defaultAlertService, defaultToastService, getErrorMessage, handleAudioError, intentMiddleware, logAudioError, qualityMiddleware, retryWithBackoff, sentimentMiddleware, translationMiddleware, useErrorHandler, useLatencyMonitor, useModelCache, useSusurro };