js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
185 lines (184 loc) • 5.99 kB
TypeScript
import { AbstractTTSClient } from "../core/abstract-tts";
import type { SpeakOptions, TTSCredentials, UnifiedVoice } from "../types";
import { type WordBoundary } from "../utils/word-timing-estimator";
/**
* OpenAI TTS Client Credentials
*/
/**
* Extended options for OpenAI TTS
*/
export interface OpenAITTSOptions extends SpeakOptions {
/** OpenAI Model */
model?: string;
/** OpenAI Voice */
voice?: string;
/** OpenAI Speed (maps to rate) */
speed?: number;
/** Output format */
format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm';
/**
* Output directory for audio files
*/
outputDir?: string;
/**
* Output file name
*/
outputFile?: string;
/**
* Callback for word boundary events
*/
onWord?: (wordBoundary: WordBoundary) => void;
/**
* Whether to return word boundaries
*/
returnWordBoundaries?: boolean;
/**
* Callback for end of speech event
*/
onEnd?: () => void;
}
/**
* OpenAI TTS Client Credentials
*/
export interface OpenAITTSCredentials extends TTSCredentials {
/** OpenAI API Key */
apiKey?: string;
/** Base URL for OpenAI API */
baseURL?: string;
/** Organization ID */
organization?: string;
}
/**
* OpenAI TTS Client
*
* This client uses the OpenAI API to convert text to speech.
* It supports streaming audio but does not support SSML.
* Word boundaries are estimated since OpenAI doesn't provide word events.
*/
export declare class OpenAITTSClient extends AbstractTTSClient {
private client;
private clientLoadingPromise;
protected credentials: OpenAITTSCredentials;
private model;
private voice;
private instructions;
private responseFormat;
private lastWordBoundaries;
/**
* Get the last word boundaries
* @returns Array of word boundary objects
*/
getLastWordBoundaries(): WordBoundary[];
/**
* Set the last word boundaries
* @param wordBoundaries Array of word boundary objects
*/
setLastWordBoundaries(wordBoundaries: WordBoundary[]): void;
/**
* Create a new OpenAI TTS Client
* @param credentials OpenAI API credentials
*/
constructor(credentials?: OpenAITTSCredentials);
/**
* Load the OpenAI SDK dynamically.
* Returns the initialized client (real or mock).
*/
private loadClient;
/**
* Check if the credentials are valid
* @returns Promise resolving to true if credentials are valid, false otherwise
*/
checkCredentials(): Promise<boolean>;
/**
* Get available voices
* @returns Promise resolving to an array of unified voice objects
*/
protected _getVoices(): Promise<UnifiedVoice[]>;
/**
* Map OpenAI voice objects to unified format
* @param rawVoices Array of OpenAI voice objects
* @returns Promise resolving to an array of unified voice objects
*/
protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>;
/**
* Set the voice to use for synthesis
* @param voiceId Voice ID to use
*/
setVoice(voiceId: string): void;
/**
* Set the model to use for synthesis
* @param model Model ID to use
*/
setModel(model: string): void;
/**
* Set instructions for the TTS engine
* @param instructions Instructions for the TTS engine
*/
setInstructions(instructions: string): void;
/**
* Set the response format
* @param format Response format (mp3, opus, aac, flac, wav, pcm)
*/
setResponseFormat(format: string): void;
/**
* Get a property value
* @param property Property name
* @returns Property value
*/
getProperty(property: string): any;
/**
* Set a property value
* @param property Property name
* @param value Property value
*/
setProperty(property: string, value: any): void;
/**
* Convert text to speech
* @param text Text to convert to speech
* @param options TTS options
* @returns Promise resolving to the path of the generated audio file
*/
textToSpeech(text: string, options?: OpenAITTSOptions): Promise<string>;
/**
* Convert text to speech with streaming
* @param text Text to convert to speech
* @param options TTS options
* @returns Promise resolving to the path of the generated audio file
*/
textToSpeechStreaming(text: string, options?: OpenAITTSOptions): Promise<string>;
/**
* Convert SSML to speech (not supported by OpenAI)
* @param ssml SSML to convert to speech
* @param options TTS options
* @returns Promise resolving to the path of the generated audio file
*/
ssmlToSpeech(_ssml: string, _options?: OpenAITTSOptions): Promise<string>;
/**
* Convert SSML to speech with streaming (not supported by OpenAI)
* @param ssml SSML to convert to speech
* @param options TTS options
* @returns Promise resolving to the path of the generated audio file
*/
ssmlToSpeechStreaming(_ssml: string, _options?: OpenAITTSOptions): Promise<string>;
/**
* Synthesize text to audio bytes
* @param text Text to synthesize
* @param options Synthesis options
* @returns Promise resolving to audio bytes
*/
synthToBytes(text: string | string[], options?: OpenAITTSOptions): Promise<Uint8Array>;
/**
* Synthesize text to a byte stream using OpenAI API.
* @param text Text to synthesize.
* @param _options Synthesis options (currently unused for streaming, uses defaults).
* @returns Promise resolving to an object containing the audio stream and an empty word boundaries array.
*/
synthToBytestream(text: string, _options?: SpeakOptions): Promise<{
audioStream: ReadableStream<Uint8Array>;
wordBoundaries: Array<{
text: string;
offset: number;
duration: number;
}>;
}>;
}