UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

185 lines (184 loc) 5.99 kB
import { AbstractTTSClient } from "../core/abstract-tts"; import type { SpeakOptions, TTSCredentials, UnifiedVoice } from "../types"; import { type WordBoundary } from "../utils/word-timing-estimator"; /** * OpenAI TTS Client Credentials */ /** * Extended options for OpenAI TTS */ export interface OpenAITTSOptions extends SpeakOptions { /** OpenAI Model */ model?: string; /** OpenAI Voice */ voice?: string; /** OpenAI Speed (maps to rate) */ speed?: number; /** Output format */ format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm'; /** * Output directory for audio files */ outputDir?: string; /** * Output file name */ outputFile?: string; /** * Callback for word boundary events */ onWord?: (wordBoundary: WordBoundary) => void; /** * Whether to return word boundaries */ returnWordBoundaries?: boolean; /** * Callback for end of speech event */ onEnd?: () => void; } /** * OpenAI TTS Client Credentials */ export interface OpenAITTSCredentials extends TTSCredentials { /** OpenAI API Key */ apiKey?: string; /** Base URL for OpenAI API */ baseURL?: string; /** Organization ID */ organization?: string; } /** * OpenAI TTS Client * * This client uses the OpenAI API to convert text to speech. * It supports streaming audio but does not support SSML. * Word boundaries are estimated since OpenAI doesn't provide word events. */ export declare class OpenAITTSClient extends AbstractTTSClient { private client; private clientLoadingPromise; protected credentials: OpenAITTSCredentials; private model; private voice; private instructions; private responseFormat; private lastWordBoundaries; /** * Get the last word boundaries * @returns Array of word boundary objects */ getLastWordBoundaries(): WordBoundary[]; /** * Set the last word boundaries * @param wordBoundaries Array of word boundary objects */ setLastWordBoundaries(wordBoundaries: WordBoundary[]): void; /** * Create a new OpenAI TTS Client * @param credentials OpenAI API credentials */ constructor(credentials?: OpenAITTSCredentials); /** * Load the OpenAI SDK dynamically. * Returns the initialized client (real or mock). */ private loadClient; /** * Check if the credentials are valid * @returns Promise resolving to true if credentials are valid, false otherwise */ checkCredentials(): Promise<boolean>; /** * Get available voices * @returns Promise resolving to an array of unified voice objects */ protected _getVoices(): Promise<UnifiedVoice[]>; /** * Map OpenAI voice objects to unified format * @param rawVoices Array of OpenAI voice objects * @returns Promise resolving to an array of unified voice objects */ protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>; /** * Set the voice to use for synthesis * @param voiceId Voice ID to use */ setVoice(voiceId: string): void; /** * Set the model to use for synthesis * @param model Model ID to use */ setModel(model: string): void; /** * Set instructions for the TTS engine * @param instructions Instructions for the TTS engine */ setInstructions(instructions: string): void; /** * Set the response format * @param format Response format (mp3, opus, aac, flac, wav, pcm) */ setResponseFormat(format: string): void; /** * Get a property value * @param property Property name * @returns Property value */ getProperty(property: string): any; /** * Set a property value * @param property Property name * @param value Property value */ setProperty(property: string, value: any): void; /** * Convert text to speech * @param text Text to convert to speech * @param options TTS options * @returns Promise resolving to the path of the generated audio file */ textToSpeech(text: string, options?: OpenAITTSOptions): Promise<string>; /** * Convert text to speech with streaming * @param text Text to convert to speech * @param options TTS options * @returns Promise resolving to the path of the generated audio file */ textToSpeechStreaming(text: string, options?: OpenAITTSOptions): Promise<string>; /** * Convert SSML to speech (not supported by OpenAI) * @param ssml SSML to convert to speech * @param options TTS options * @returns Promise resolving to the path of the generated audio file */ ssmlToSpeech(_ssml: string, _options?: OpenAITTSOptions): Promise<string>; /** * Convert SSML to speech with streaming (not supported by OpenAI) * @param ssml SSML to convert to speech * @param options TTS options * @returns Promise resolving to the path of the generated audio file */ ssmlToSpeechStreaming(_ssml: string, _options?: OpenAITTSOptions): Promise<string>; /** * Synthesize text to audio bytes * @param text Text to synthesize * @param options Synthesis options * @returns Promise resolving to audio bytes */ synthToBytes(text: string | string[], options?: OpenAITTSOptions): Promise<Uint8Array>; /** * Synthesize text to a byte stream using OpenAI API. * @param text Text to synthesize. * @param _options Synthesis options (currently unused for streaming, uses defaults). * @returns Promise resolving to an object containing the audio stream and an empty word boundaries array. */ synthToBytestream(text: string, _options?: SpeakOptions): Promise<{ audioStream: ReadableStream<Uint8Array>; wordBoundaries: Array<{ text: string; offset: number; duration: number; }>; }>; }