edge-tts-universal
Version:
Universal text-to-speech library using Microsoft Edge's online TTS service. Works in Node.js and browsers WITHOUT needing Microsoft Edge, Windows, or an API key
316 lines (310 loc) • 10.1 kB
TypeScript
import { T as TTSChunk, V as Voice, a as VoicesManagerFind, b as VoicesManagerVoice } from './exceptions-C4rAyGjr.js';
export { C as CommunicateState, E as EdgeTTSException, F as FetchError, I as IsomorphicCommunicate, c as IsomorphicCommunicateOptions, e as IsomorphicDRM, d as IsomorphicVoicesManager, N as NoAudioReceived, f as SkewAdjustmentError, S as SubMaker, g as UnexpectedResponse, U as UnknownResponse, h as ValueError, i as VoiceTag, W as WebSocketError, l as listVoicesIsomorphic } from './exceptions-C4rAyGjr.js';
/**
* Configuration options for the Communicate class.
*/
interface CommunicateOptions {
/** Voice to use for synthesis (e.g., "en-US-EmmaMultilingualNeural") */
voice?: string;
/** Speech rate adjustment (e.g., "+20%", "-10%") */
rate?: string;
/** Volume level adjustment (e.g., "+50%", "-25%") */
volume?: string;
/** Pitch adjustment in Hz (e.g., "+5Hz", "-10Hz") */
pitch?: string;
/** Proxy URL for requests */
proxy?: string;
/** WebSocket connection timeout in milliseconds */
connectionTimeout?: number;
}
/**
* Main class for text-to-speech synthesis using Microsoft Edge's online TTS service.
*
* @example
* ```typescript
* const communicate = new Communicate('Hello, world!', {
* voice: 'en-US-EmmaMultilingualNeural',
* });
*
* for await (const chunk of communicate.stream()) {
* if (chunk.type === 'audio' && chunk.data) {
* // Handle audio data
* }
* }
* ```
*/
declare class Communicate {
private readonly ttsConfig;
private readonly texts;
private readonly proxy?;
private readonly connectionTimeout?;
private state;
/**
* Creates a new Communicate instance for text-to-speech synthesis.
*
* @param text - The text to synthesize
* @param options - Configuration options for synthesis
*/
constructor(text: string, options?: CommunicateOptions);
private parseMetadata;
private _stream;
/**
* Streams text-to-speech synthesis results.
*
* Returns an async generator that yields audio chunks and word boundary events.
* Can only be called once per Communicate instance.
*
* @yields TTSChunk - Audio data or word boundary information
* @throws {Error} If called more than once
* @throws {NoAudioReceived} If no audio data is received
* @throws {WebSocketError} If WebSocket connection fails
*
* @example
* ```typescript
* for await (const chunk of communicate.stream()) {
* if (chunk.type === 'audio') {
* // Process audio data
* } else if (chunk.type === 'WordBoundary') {
* // Process subtitle timing
* }
* }
* ```
*/
stream(): AsyncGenerator<TTSChunk, void, unknown>;
}
/**
* Fetches all available voices from the Microsoft Edge TTS service.
*
* @param proxy - Optional proxy URL for the request
* @returns Promise resolving to array of available voices
*/
declare function listVoices(proxy?: string): Promise<Voice[]>;
/**
* Utility class for finding and filtering available voices.
*
* @example
* ```typescript
* const voicesManager = await VoicesManager.create();
* const englishVoices = voicesManager.find({ Language: 'en' });
* ```
*/
declare class VoicesManager {
private voices;
private calledCreate;
/**
* Creates a new VoicesManager instance.
*
* @param customVoices - Optional custom voice list instead of fetching from API
* @param proxy - Optional proxy URL for API requests
* @returns Promise resolving to VoicesManager instance
*/
static create(customVoices?: Voice[], proxy?: string): Promise<VoicesManager>;
/**
* Finds voices matching the specified criteria.
*
* @param filter - Filter criteria for voice selection
* @returns Array of voices matching the filter
* @throws {Error} If called before create()
*/
find(filter: VoicesManagerFind): VoicesManagerVoice[];
}
/**
* Options for controlling the voice prosody (rate, pitch, volume).
*/
interface ProsodyOptions$1 {
/**
* The speaking rate of the voice.
* Examples: "+10.00%", "-20.00%"
*/
rate?: string;
/**
* The speaking volume of the voice.
* Examples: "+15.00%", "-10.00%"
*/
volume?: string;
/**
* The speaking pitch of the voice.
* Examples: "+20Hz", "-10Hz"
*/
pitch?: string;
}
/**
* Represents a single word boundary with its timing and text.
* The API provides timing in 100-nanosecond units.
*/
interface WordBoundary$1 {
/**
* The offset from the beginning of the audio stream in 100-nanosecond units.
*/
offset: number;
/**
* The duration of the word in 100-nanosecond units.
*/
duration: number;
/**
* The text of the spoken word.
*/
text: string;
}
/**
* The final result of the synthesis process.
*/
interface SynthesisResult$1 {
/**
* The generated audio as a Blob, which can be used in an <audio> element.
*/
audio: Blob;
/**
* An array of word boundaries containing timing and text for creating subtitles.
*/
subtitle: WordBoundary$1[];
}
/**
* Simple Edge TTS class that provides the same API as the standalone implementation
* but uses the robust infrastructure of the modular project.
*/
declare class EdgeTTS {
text: string;
voice: string;
rate: string;
volume: string;
pitch: string;
/**
* @param text The text to be synthesized.
* @param voice The voice to use for synthesis.
* @param options Prosody options (rate, volume, pitch).
*/
constructor(text: string, voice?: string, options?: ProsodyOptions$1);
/**
* Initiates the synthesis process.
* @returns A promise that resolves with the synthesized audio and subtitle data.
*/
synthesize(): Promise<SynthesisResult$1>;
}
/**
* Creates a subtitle file content in VTT (WebVTT) format.
* @param wordBoundaries The array of word boundary data.
* @returns A string containing the VTT formatted subtitles.
*/
declare function createVTT$1(wordBoundaries: WordBoundary$1[]): string;
/**
* Creates a subtitle file content in SRT (SubRip) format.
* @param wordBoundaries The array of word boundary data.
* @returns A string containing the SRT formatted subtitles.
*/
declare function createSRT$1(wordBoundaries: WordBoundary$1[]): string;
/**
* Browser-compatible version of edge-tts Simple API
* Uses native browser APIs instead of Node.js dependencies
*/
/**
* Options for controlling the voice prosody (rate, pitch, volume).
*/
interface ProsodyOptions {
/**
* The speaking rate of the voice.
* Examples: "+10.00%", "-20.00%"
*/
rate?: string;
/**
* The speaking volume of the voice.
* Examples: "+15.00%", "-10.00%"
*/
volume?: string;
/**
* The speaking pitch of the voice.
* Examples: "+20Hz", "-10Hz"
*/
pitch?: string;
}
/**
* Represents a single word boundary with its timing and text.
* The API provides timing in 100-nanosecond units.
*/
interface WordBoundary {
/**
* The offset from the beginning of the audio stream in 100-nanosecond units.
*/
offset: number;
/**
* The duration of the word in 100-nanosecond units.
*/
duration: number;
/**
* The text of the spoken word.
*/
text: string;
}
/**
* The final result of the synthesis process.
*/
interface SynthesisResult {
/**
* The generated audio as a Blob, which can be used in an <audio> element.
*/
audio: Blob;
/**
* An array of word boundaries containing timing and text for creating subtitles.
*/
subtitle: WordBoundary[];
}
/**
* Browser-compatible Edge TTS class that uses native browser APIs.
*
* @remarks This uses an undocumented Microsoft API. CORS policy may prevent
* direct usage from web apps. Consider using a proxy server.
*/
declare class EdgeTTSBrowser {
text: string;
voice: string;
rate: string;
volume: string;
pitch: string;
private ws;
private readonly WSS_URL;
private readonly TRUSTED_CLIENT_TOKEN;
/**
* @param text The text to be synthesized.
* @param voice The voice to use for synthesis.
* @param options Prosody options (rate, volume, pitch).
*/
constructor(text: string, voice?: string, options?: ProsodyOptions);
/**
* Initiates the synthesis process.
* @returns A promise that resolves with the synthesized audio and subtitle data.
*/
synthesize(): Promise<SynthesisResult>;
/**
* Establishes a connection to the WebSocket server.
*/
private connect;
/**
* Parses a string message from the WebSocket into headers and a body.
*/
private parseMessage;
/**
* Creates the speech configuration message.
*/
private createSpeechConfig;
/**
* Creates the SSML (Speech Synthesis Markup Language) message.
*/
private createSSML;
private generateConnectionId;
private getTimestamp;
private escapeXml;
/**
* Browser-compatible version of DRM security token generation
* Uses Web Crypto API instead of Node.js crypto
*/
private generateSecMsGec;
}
/**
* Creates a subtitle file content in VTT (WebVTT) format.
*/
declare function createVTT(wordBoundaries: WordBoundary[]): string;
/**
* Creates a subtitle file content in SRT (SubRip) format.
*/
declare function createSRT(wordBoundaries: WordBoundary[]): string;
export { type ProsodyOptions as BrowserProsodyOptions, type SynthesisResult as BrowserSynthesisResult, type WordBoundary as BrowserWordBoundary, Communicate, type CommunicateOptions, EdgeTTS, EdgeTTSBrowser, type ProsodyOptions$1 as ProsodyOptions, type SynthesisResult$1 as SynthesisResult, TTSChunk, Voice, VoicesManager, VoicesManagerFind, VoicesManagerVoice, type WordBoundary$1 as WordBoundary, createSRT$1 as createSRT, createSRT as createSRTBrowser, createVTT$1 as createVTT, createVTT as createVTTBrowser, listVoices };