js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
223 lines (222 loc) • 7.04 kB
TypeScript
import { AbstractTTSClient } from "../core/abstract-tts";
import type { SpeakOptions, TTSCredentials, UnifiedVoice, WordBoundaryCallback } from "../types";
/**
* Extended options for ElevenLabs TTS.
* seed, languageCode, previousText, nextText, and applyTextNormalization are
* only honoured by the eleven_v3 model and are silently ignored by others.
*/
export interface ElevenLabsTTSOptions extends SpeakOptions {
format?: "mp3" | "wav";
useTimestamps?: boolean;
model?: string;
modelId?: string;
outputFormat?: string;
voiceSettings?: Record<string, unknown>;
requestOptions?: Record<string, unknown>;
seed?: number;
languageCode?: string;
previousText?: string;
nextText?: string;
applyTextNormalization?: "auto" | "on" | "off";
}
/**
* ElevenLabs TTS credentials
*/
export interface ElevenLabsCredentials extends TTSCredentials {
/**
* ElevenLabs API key
*/
apiKey?: string;
/**
* Optional default model selection
*/
model?: string;
modelId?: string;
/**
* Override default output format (e.g. mp3_44100_128)
*/
outputFormat?: string;
/**
* Pass-through configuration as object or JSON string
*/
properties?: Record<string, unknown> | string;
propertiesJson?: string;
}
/**
* ElevenLabs character alignment data
*/
export interface ElevenLabsAlignment {
characters: string[];
character_start_times_seconds: number[];
character_end_times_seconds: number[];
}
/**
* ElevenLabs API response with timestamps
*/
export interface ElevenLabsTimestampResponse {
audio_base64: string;
alignment: ElevenLabsAlignment;
normalized_alignment?: ElevenLabsAlignment;
}
/**
* ElevenLabs TTS client
*/
export declare class ElevenLabsTTSClient extends AbstractTTSClient {
private static readonly MODEL_V3;
private static readonly DEFAULT_MODEL;
/**
* ElevenLabs API key
*/
private apiKey;
/**
* Base URL for ElevenLabs API
*/
private baseUrl;
/**
* Default model to use for synthesis
*/
private modelId;
/**
* Default output format for requests
*/
private outputFormat;
/**
* Request-level overrides provided via credentials/properties
*/
private requestOverrides;
/**
* Create a new ElevenLabs TTS client
* @param credentials ElevenLabs credentials
*/
constructor(credentials?: ElevenLabsCredentials);
/**
* Apply any configuration passed through credentials (including JSON strings)
*/
private applyCredentialProperties;
/**
* Resolve the model ID for a request
*/
private resolveModelId;
/**
* Resolve the output format for a request
*/
private resolveOutputFormat;
/**
* Merge default and override voice settings
*/
private resolveVoiceSettings;
/**
* Remove voice_settings from an overrides object to avoid double-merging
*/
private withoutVoiceSettings;
/**
* Build a request payload honoring defaults and user overrides
*/
private buildRequestPayload;
/**
* Set default model ID
*/
setModelId(modelId: string): void;
/**
* Get a property value
*/
getProperty(property: string): any;
/**
* Set a property value
*/
setProperty(property: string, value: any): void;
/**
* Check if the credentials are valid
* @returns Promise resolving to true if credentials are valid, false otherwise
*/
checkCredentials(): Promise<boolean>;
/**
* Perform a tiny synthesis to detect quota/Unauthorized issues up-front
* Returns false if quota is exceeded or API key is unauthorized for synthesis
*/
private _quotaProbe;
/**
* Get the list of required credential types for this engine
* @returns Array of required credential field names
*/
protected getRequiredCredentials(): string[];
/**
* Merge raw voices with resolved language data from the models endpoint.
* Extracted as a separate method so tests can inject mock data directly.
*/
protected _getVoicesWithModels(rawVoices: any[], models: any[]): any[];
protected _getVoices(): Promise<any[]>;
private static readonly AUDIO_TAG_REGEX;
/**
* Prepare text for synthesis by stripping SSML tags.
* ElevenLabs does not support SSML — use native [audio tags] for v3 expressiveness.
*/
private prepareText;
/**
* Process audio tags ([laugh], [sigh], etc.) based on the model.
* eleven_v3 natively supports audio tags — pass them through.
* For all other models, strip audio tags.
*/
private processAudioTags;
/**
* Convert text to audio bytes
* @param text Text to synthesize
* @param options Synthesis options
* @returns Promise resolving to audio bytes
*/
synthToBytes(text: string, options?: ElevenLabsTTSOptions): Promise<Uint8Array>;
/**
* Synthesize text to a byte stream
* @param text Text to synthesize
* @param options Synthesis options
* @returns Promise resolving to an object containing the audio stream and word boundaries array
*/
synthToBytestream(text: string, options?: ElevenLabsTTSOptions): Promise<{
audioStream: ReadableStream<Uint8Array>;
wordBoundaries: Array<{
text: string;
offset: number;
duration: number;
}>;
}>;
/**
* Call ElevenLabs API with timestamps endpoint
* @param text Text to synthesize
* @param voiceId Voice ID to use
* @param options Synthesis options
* @returns Promise resolving to timestamp response
*/
private synthWithTimestamps;
/**
* Convert character-level timing data to word boundaries
* @param text Original text
* @param alignment Character alignment data from ElevenLabs
* @returns Array of word boundary objects
*/
private convertCharacterTimingToWordBoundaries;
/**
* Start playback with word boundary callbacks
* @param text Text to speak
* @param callback Callback function for word boundaries
* @param options Synthesis options
*/
startPlaybackWithCallbacks(text: string, callback: WordBoundaryCallback, options?: ElevenLabsTTSOptions): Promise<void>;
/**
* Map ElevenLabs voice objects to unified format
* @param rawVoices Array of ElevenLabs voice objects
* @returns Promise resolving to an array of unified voice objects
*/
protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>;
/**
* Get voice by ID
* @param voiceId Voice ID
* @returns Promise resolving to voice details
*/
getVoice(voiceId: string): Promise<UnifiedVoice | null>;
/**
* Convert MP3 audio data to WAV format using the audio converter utility
* @param mp3Data MP3 audio data from ElevenLabs
* @returns WAV audio data
*/
private convertMp3ToWav;
}