UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

223 lines (222 loc) 7.04 kB
import { AbstractTTSClient } from "../core/abstract-tts"; import type { SpeakOptions, TTSCredentials, UnifiedVoice, WordBoundaryCallback } from "../types"; /** * Extended options for ElevenLabs TTS. * seed, languageCode, previousText, nextText, and applyTextNormalization are * only honoured by the eleven_v3 model and are silently ignored by others. */ export interface ElevenLabsTTSOptions extends SpeakOptions { format?: "mp3" | "wav"; useTimestamps?: boolean; model?: string; modelId?: string; outputFormat?: string; voiceSettings?: Record<string, unknown>; requestOptions?: Record<string, unknown>; seed?: number; languageCode?: string; previousText?: string; nextText?: string; applyTextNormalization?: "auto" | "on" | "off"; } /** * ElevenLabs TTS credentials */ export interface ElevenLabsCredentials extends TTSCredentials { /** * ElevenLabs API key */ apiKey?: string; /** * Optional default model selection */ model?: string; modelId?: string; /** * Override default output format (e.g. mp3_44100_128) */ outputFormat?: string; /** * Pass-through configuration as object or JSON string */ properties?: Record<string, unknown> | string; propertiesJson?: string; } /** * ElevenLabs character alignment data */ export interface ElevenLabsAlignment { characters: string[]; character_start_times_seconds: number[]; character_end_times_seconds: number[]; } /** * ElevenLabs API response with timestamps */ export interface ElevenLabsTimestampResponse { audio_base64: string; alignment: ElevenLabsAlignment; normalized_alignment?: ElevenLabsAlignment; } /** * ElevenLabs TTS client */ export declare class ElevenLabsTTSClient extends AbstractTTSClient { private static readonly MODEL_V3; private static readonly DEFAULT_MODEL; /** * ElevenLabs API key */ private apiKey; /** * Base URL for ElevenLabs API */ private baseUrl; /** * Default model to use for synthesis */ private modelId; /** * Default output format for requests */ private outputFormat; /** * Request-level overrides provided via credentials/properties */ private requestOverrides; /** * Create a new ElevenLabs TTS client * @param credentials ElevenLabs credentials */ constructor(credentials?: ElevenLabsCredentials); /** * Apply any configuration passed through credentials (including JSON strings) */ private applyCredentialProperties; /** * Resolve the model ID for a request */ private resolveModelId; /** * Resolve the output format for a request */ private resolveOutputFormat; /** * Merge default and override voice settings */ private resolveVoiceSettings; /** * Remove voice_settings from an overrides object to avoid double-merging */ private withoutVoiceSettings; /** * Build a request payload honoring defaults and user overrides */ private buildRequestPayload; /** * Set default model ID */ setModelId(modelId: string): void; /** * Get a property value */ getProperty(property: string): any; /** * Set a property value */ setProperty(property: string, value: any): void; /** * Check if the credentials are valid * @returns Promise resolving to true if credentials are valid, false otherwise */ checkCredentials(): Promise<boolean>; /** * Perform a tiny synthesis to detect quota/Unauthorized issues up-front * Returns false if quota is exceeded or API key is unauthorized for synthesis */ private _quotaProbe; /** * Get the list of required credential types for this engine * @returns Array of required credential field names */ protected getRequiredCredentials(): string[]; /** * Merge raw voices with resolved language data from the models endpoint. * Extracted as a separate method so tests can inject mock data directly. */ protected _getVoicesWithModels(rawVoices: any[], models: any[]): any[]; protected _getVoices(): Promise<any[]>; private static readonly AUDIO_TAG_REGEX; /** * Prepare text for synthesis by stripping SSML tags. * ElevenLabs does not support SSML — use native [audio tags] for v3 expressiveness. */ private prepareText; /** * Process audio tags ([laugh], [sigh], etc.) based on the model. * eleven_v3 natively supports audio tags — pass them through. * For all other models, strip audio tags. */ private processAudioTags; /** * Convert text to audio bytes * @param text Text to synthesize * @param options Synthesis options * @returns Promise resolving to audio bytes */ synthToBytes(text: string, options?: ElevenLabsTTSOptions): Promise<Uint8Array>; /** * Synthesize text to a byte stream * @param text Text to synthesize * @param options Synthesis options * @returns Promise resolving to an object containing the audio stream and word boundaries array */ synthToBytestream(text: string, options?: ElevenLabsTTSOptions): Promise<{ audioStream: ReadableStream<Uint8Array>; wordBoundaries: Array<{ text: string; offset: number; duration: number; }>; }>; /** * Call ElevenLabs API with timestamps endpoint * @param text Text to synthesize * @param voiceId Voice ID to use * @param options Synthesis options * @returns Promise resolving to timestamp response */ private synthWithTimestamps; /** * Convert character-level timing data to word boundaries * @param text Original text * @param alignment Character alignment data from ElevenLabs * @returns Array of word boundary objects */ private convertCharacterTimingToWordBoundaries; /** * Start playback with word boundary callbacks * @param text Text to speak * @param callback Callback function for word boundaries * @param options Synthesis options */ startPlaybackWithCallbacks(text: string, callback: WordBoundaryCallback, options?: ElevenLabsTTSOptions): Promise<void>; /** * Map ElevenLabs voice objects to unified format * @param rawVoices Array of ElevenLabs voice objects * @returns Promise resolving to an array of unified voice objects */ protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>; /** * Get voice by ID * @param voiceId Voice ID * @returns Promise resolving to voice details */ getVoice(voiceId: string): Promise<UnifiedVoice | null>; /** * Convert MP3 audio data to WAV format using the audio converter utility * @param mp3Data MP3 audio data from ElevenLabs * @returns WAV audio data */ private convertMp3ToWav; }