edge-tts-universal

Version:

Universal text-to-speech library using Microsoft Edge's online TTS service. Works in Node.js and browsers WITHOUT needing Microsoft Edge, Windows, or an API key

github.com/travisvn/edge-tts-universal

travisvn/edge-tts-universal

316 lines (310 loc) • 10.1 kB

TypeScript

import { T as TTSChunk, V as Voice, a as VoicesManagerFind, b as VoicesManagerVoice } from './exceptions-C4rAyGjr.js'; export { C as CommunicateState, E as EdgeTTSException, F as FetchError, I as IsomorphicCommunicate, c as IsomorphicCommunicateOptions, e as IsomorphicDRM, d as IsomorphicVoicesManager, N as NoAudioReceived, f as SkewAdjustmentError, S as SubMaker, g as UnexpectedResponse, U as UnknownResponse, h as ValueError, i as VoiceTag, W as WebSocketError, l as listVoicesIsomorphic } from './exceptions-C4rAyGjr.js'; /** * Configuration options for the Communicate class. */ interface CommunicateOptions { /** Voice to use for synthesis (e.g., "en-US-EmmaMultilingualNeural") */ voice?: string; /** Speech rate adjustment (e.g., "+20%", "-10%") */ rate?: string; /** Volume level adjustment (e.g., "+50%", "-25%") */ volume?: string; /** Pitch adjustment in Hz (e.g., "+5Hz", "-10Hz") */ pitch?: string; /** Proxy URL for requests */ proxy?: string; /** WebSocket connection timeout in milliseconds */ connectionTimeout?: number; } /** * Main class for text-to-speech synthesis using Microsoft Edge's online TTS service. * * @example * ```typescript * const communicate = new Communicate('Hello, world!', { * voice: 'en-US-EmmaMultilingualNeural', * }); * * for await (const chunk of communicate.stream()) { * if (chunk.type === 'audio' && chunk.data) { * // Handle audio data * } * } * ``` */ declare class Communicate { private readonly ttsConfig; private readonly texts; private readonly proxy?; private readonly connectionTimeout?; private state; /** * Creates a new Communicate instance for text-to-speech synthesis. * * @param text - The text to synthesize * @param options - Configuration options for synthesis */ constructor(text: string, options?: CommunicateOptions); private parseMetadata; private _stream; /** * Streams text-to-speech synthesis results. * * Returns an async generator that yields audio chunks and word boundary events. * Can only be called once per Communicate instance. * * @yields TTSChunk - Audio data or word boundary information * @throws {Error} If called more than once * @throws {NoAudioReceived} If no audio data is received * @throws {WebSocketError} If WebSocket connection fails * * @example * ```typescript * for await (const chunk of communicate.stream()) { * if (chunk.type === 'audio') { * // Process audio data * } else if (chunk.type === 'WordBoundary') { * // Process subtitle timing * } * } * ``` */ stream(): AsyncGenerator<TTSChunk, void, unknown>; } /** * Fetches all available voices from the Microsoft Edge TTS service. * * @param proxy - Optional proxy URL for the request * @returns Promise resolving to array of available voices */ declare function listVoices(proxy?: string): Promise<Voice[]>; /** * Utility class for finding and filtering available voices. * * @example * ```typescript * const voicesManager = await VoicesManager.create(); * const englishVoices = voicesManager.find({ Language: 'en' }); * ``` */ declare class VoicesManager { private voices; private calledCreate; /** * Creates a new VoicesManager instance. * * @param customVoices - Optional custom voice list instead of fetching from API * @param proxy - Optional proxy URL for API requests * @returns Promise resolving to VoicesManager instance */ static create(customVoices?: Voice[], proxy?: string): Promise<VoicesManager>; /** * Finds voices matching the specified criteria. * * @param filter - Filter criteria for voice selection * @returns Array of voices matching the filter * @throws {Error} If called before create() */ find(filter: VoicesManagerFind): VoicesManagerVoice[]; } /** * Options for controlling the voice prosody (rate, pitch, volume). */ interface ProsodyOptions$1 { /** * The speaking rate of the voice. * Examples: "+10.00%", "-20.00%" */ rate?: string; /** * The speaking volume of the voice. * Examples: "+15.00%", "-10.00%" */ volume?: string; /** * The speaking pitch of the voice. * Examples: "+20Hz", "-10Hz" */ pitch?: string; } /** * Represents a single word boundary with its timing and text. * The API provides timing in 100-nanosecond units. */ interface WordBoundary$1 { /** * The offset from the beginning of the audio stream in 100-nanosecond units. */ offset: number; /** * The duration of the word in 100-nanosecond units. */ duration: number; /** * The text of the spoken word. */ text: string; } /** * The final result of the synthesis process. */ interface SynthesisResult$1 { /** * The generated audio as a Blob, which can be used in an <audio> element. */ audio: Blob; /** * An array of word boundaries containing timing and text for creating subtitles. */ subtitle: WordBoundary$1[]; } /** * Simple Edge TTS class that provides the same API as the standalone implementation * but uses the robust infrastructure of the modular project. */ declare class EdgeTTS { text: string; voice: string; rate: string; volume: string; pitch: string; /** * @param text The text to be synthesized. * @param voice The voice to use for synthesis. * @param options Prosody options (rate, volume, pitch). */ constructor(text: string, voice?: string, options?: ProsodyOptions$1); /** * Initiates the synthesis process. * @returns A promise that resolves with the synthesized audio and subtitle data. */ synthesize(): Promise<SynthesisResult$1>; } /** * Creates a subtitle file content in VTT (WebVTT) format. * @param wordBoundaries The array of word boundary data. * @returns A string containing the VTT formatted subtitles. */ declare function createVTT$1(wordBoundaries: WordBoundary$1[]): string; /** * Creates a subtitle file content in SRT (SubRip) format. * @param wordBoundaries The array of word boundary data. * @returns A string containing the SRT formatted subtitles. */ declare function createSRT$1(wordBoundaries: WordBoundary$1[]): string; /** * Browser-compatible version of edge-tts Simple API * Uses native browser APIs instead of Node.js dependencies */ /** * Options for controlling the voice prosody (rate, pitch, volume). */ interface ProsodyOptions { /** * The speaking rate of the voice. * Examples: "+10.00%", "-20.00%" */ rate?: string; /** * The speaking volume of the voice. * Examples: "+15.00%", "-10.00%" */ volume?: string; /** * The speaking pitch of the voice. * Examples: "+20Hz", "-10Hz" */ pitch?: string; } /** * Represents a single word boundary with its timing and text. * The API provides timing in 100-nanosecond units. */ interface WordBoundary { /** * The offset from the beginning of the audio stream in 100-nanosecond units. */ offset: number; /** * The duration of the word in 100-nanosecond units. */ duration: number; /** * The text of the spoken word. */ text: string; } /** * The final result of the synthesis process. */ interface SynthesisResult { /** * The generated audio as a Blob, which can be used in an <audio> element. */ audio: Blob; /** * An array of word boundaries containing timing and text for creating subtitles. */ subtitle: WordBoundary[]; } /** * Browser-compatible Edge TTS class that uses native browser APIs. * * @remarks This uses an undocumented Microsoft API. CORS policy may prevent * direct usage from web apps. Consider using a proxy server. */ declare class EdgeTTSBrowser { text: string; voice: string; rate: string; volume: string; pitch: string; private ws; private readonly WSS_URL; private readonly TRUSTED_CLIENT_TOKEN; /** * @param text The text to be synthesized. * @param voice The voice to use for synthesis. * @param options Prosody options (rate, volume, pitch). */ constructor(text: string, voice?: string, options?: ProsodyOptions); /** * Initiates the synthesis process. * @returns A promise that resolves with the synthesized audio and subtitle data. */ synthesize(): Promise<SynthesisResult>; /** * Establishes a connection to the WebSocket server. */ private connect; /** * Parses a string message from the WebSocket into headers and a body. */ private parseMessage; /** * Creates the speech configuration message. */ private createSpeechConfig; /** * Creates the SSML (Speech Synthesis Markup Language) message. */ private createSSML; private generateConnectionId; private getTimestamp; private escapeXml; /** * Browser-compatible version of DRM security token generation * Uses Web Crypto API instead of Node.js crypto */ private generateSecMsGec; } /** * Creates a subtitle file content in VTT (WebVTT) format. */ declare function createVTT(wordBoundaries: WordBoundary[]): string; /** * Creates a subtitle file content in SRT (SubRip) format. */ declare function createSRT(wordBoundaries: WordBoundary[]): string; export { type ProsodyOptions as BrowserProsodyOptions, type SynthesisResult as BrowserSynthesisResult, type WordBoundary as BrowserWordBoundary, Communicate, type CommunicateOptions, EdgeTTS, EdgeTTSBrowser, type ProsodyOptions$1 as ProsodyOptions, type SynthesisResult$1 as SynthesisResult, TTSChunk, Voice, VoicesManager, VoicesManagerFind, VoicesManagerVoice, type WordBoundary$1 as WordBoundary, createSRT$1 as createSRT, createSRT as createSRTBrowser, createVTT$1 as createVTT, createVTT as createVTTBrowser, listVoices };