js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
219 lines (218 loc) • 7.8 kB
TypeScript
import { SSMLBuilder } from "../ssml/builder";
import type { CredentialsCheckResult, PropertyType, SimpleCallback, SpeakInput, SpeakOptions, TTSCredentials, TTSEventType, UnifiedVoice, WordBoundaryCallback } from "../types";
/**
* Abstract base class for all TTS clients
* This provides a unified interface for all TTS providers
*/
export declare abstract class AbstractTTSClient {
protected credentials: TTSCredentials;
/**
* Currently selected voice ID
*/
protected voiceId: string | null;
/**
* Currently selected language
*/
protected lang: string;
/**
* Event callbacks
*/
protected callbacks: Record<string, ((...args: any[]) => void)[]>;
/**
* SSML builder instance
*/
ssml: SSMLBuilder;
/**
* Audio playback properties
*/
protected audio: {
isPlaying: boolean;
isPaused: boolean;
audioElement: HTMLAudioElement | null;
position: number;
duration: number;
};
/**
* TTS properties (rate, pitch, volume)
*/
protected properties: Record<string, PropertyType>;
/**
* Word timings for the current audio
*/
protected timings: Array<[number, number, string]>;
/**
* Audio sample rate in Hz
* This is used for playback and word timing estimation
* Default is 24000 Hz, but engines can override this
*/
protected sampleRate: number;
/**
* Creates a new TTS client
* @param credentials Provider-specific credentials
*/
constructor(credentials: TTSCredentials);
/**
* Get raw voices from the provider
* This is an internal method that should be implemented by each engine
* @returns Promise resolving to an array of raw voice objects
*/
protected abstract _getVoices(): Promise<UnifiedVoice[]>;
/**
* Synthesize text to audio bytes
* @param text Text or SSML to synthesize
* @param options Synthesis options
* @returns Promise resolving to audio bytes
*/
abstract synthToBytes(text: string, options?: SpeakOptions): Promise<Uint8Array>;
/**
* Synthesize text to a byte stream and optionally provide word boundaries.
* @param text Text or SSML to synthesize
* @param options Synthesis options
* @returns Promise resolving to an object containing the audio stream and an array of word boundaries.
* The wordBoundaries array will be empty if the engine does not support them.
*/
abstract synthToBytestream(text: string, options?: SpeakOptions): Promise<{
audioStream: ReadableStream<Uint8Array>;
wordBoundaries: Array<{
text: string;
offset: number;
duration: number;
}>;
}>;
/**
* Get available voices from the provider with normalized language codes
* @returns Promise resolving to an array of unified voice objects
*/
getVoices(): Promise<UnifiedVoice[]>;
/**
* Map provider-specific voice objects to unified format
* @param rawVoices Array of provider-specific voice objects
* @returns Promise resolving to an array of partially unified voice objects
*/
protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>;
/**
* Speak text using the default audio output, or play audio from file/bytes/stream
* @param input Text to speak, or audio input (filename, audioBytes, or audioStream)
* @param options Synthesis options
* @returns Promise resolving when audio playback starts
*/
speak(input: string | SpeakInput, options?: SpeakOptions): Promise<void>;
/**
* Speak text using streaming synthesis, or play audio from file/bytes/stream
* @param input Text to speak, or audio input (filename, audioBytes, or audioStream)
* @param options Synthesis options
* @returns Promise resolving when audio playback starts
*/
speakStreamed(input: string | SpeakInput, options?: SpeakOptions): Promise<void>;
/**
* Synthesize text to audio and save it to a file (browser download)
* @param text Text or SSML to synthesize
* @param filename Filename to save as
* @param format Audio format (mp3 or wav)
* @param options Synthesis options
*/
synthToFile(text: string, filename: string, format?: "mp3" | "wav", options?: SpeakOptions): Promise<void>;
/**
* Set the voice to use for synthesis
* @param voiceId Voice ID to use
* @param lang Language code (optional)
*/
setVoice(voiceId: string, lang?: string): void;
/**
* Pause audio playback
*/
pause(): void;
/**
* Resume audio playback
*/
resume(): void;
/**
* Stop audio playback
*/
stop(): void;
/**
* Create estimated word timings for non-streaming engines
* @param text Text to create timings for
*/
protected _createEstimatedWordTimings(text: string): void;
/**
* Fire word boundary callbacks based on timing data
*/
protected _fireWordBoundaryCallbacks(): void;
/**
* Schedule word boundary callbacks based on timing information
* This is used when we have audio playback but need to schedule callbacks
*/
protected _scheduleWordBoundaryCallbacks(): void;
/**
* Check if text is SSML
* @param text Text to check
* @returns True if text is SSML
*/
protected _isSSML(text: string): boolean;
/**
* Strip SSML tags from text
* @param ssml SSML text
* @returns Plain text without SSML tags
*/
protected _stripSSML(ssml: string): string;
/**
* Register a callback for an event
* @param event Event type
* @param fn Callback function
*/
on(event: TTSEventType, fn: (...args: any[]) => void): void;
/**
* Emit an event to all registered callbacks
* @param event Event type
* @param args Event arguments
*/
protected emit(event: string, ...args: any[]): void;
/**
* Start playback with word boundary callbacks
* @param text Text or SSML to speak
* @param callback Callback function for word boundaries
* @param options Synthesis options
*/
startPlaybackWithCallbacks(text: string, callback: WordBoundaryCallback, options?: SpeakOptions): Promise<void>;
/**
* Connect a callback to an event
* @param event Event name
* @param callback Callback function
*/
connect(event: "onStart" | "onEnd", callback: SimpleCallback): void;
/**
* Get the value of a property
* @param propertyName Property name
* @returns Property value
*/
getProperty(propertyName: string): PropertyType;
/**
* Set a property value
* @param propertyName Property name
* @param value Property value
*/
setProperty(propertyName: string, value: PropertyType): void;
/**
* Create a prosody tag with the current properties
* @param text Text to wrap with prosody
* @returns Text with prosody tag
*/
constructProsodyTag(text: string): string;
/**
* Check if credentials are valid
* @returns Promise resolving to true if credentials are valid
*/
checkCredentials(): Promise<boolean>;
/**
* Check if credentials are valid with detailed response
* @returns Promise resolving to an object with success flag and optional error message
*/
checkCredentialsDetailed(): Promise<CredentialsCheckResult>;
/**
* Get available voices for a specific language
* @param language Language code (BCP-47 format, e.g., 'en-US')
* @returns Promise resolving to an array of available voices for the specified language
*/
getVoicesByLanguage(language: string): Promise<UnifiedVoice[]>;
}