UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

133 lines (132 loc) 4.32 kB
import { AbstractTTSClient } from "../core/abstract-tts"; import type { SpeakOptions, TTSCredentials, UnifiedVoice } from "../types"; /** * Extended options for Polly TTS */ export interface PollyTTSOptions extends SpeakOptions { format?: 'mp3' | 'wav' | 'ogg'; filePath?: string; } /** * AWS Polly TTS credentials */ export interface PollyTTSCredentials extends TTSCredentials { /** * AWS region */ region: string; /** * AWS access key ID */ accessKeyId: string; /** * AWS secret access key */ secretAccessKey: string; } /** * AWS Polly TTS client */ export declare class PollyTTSClient extends AbstractTTSClient { /** * AWS Polly client */ private client; private _pollyModule; /** * Cache of voice metadata for engine detection */ private voiceCache; /** * Create a new AWS Polly TTS client * @param credentials AWS credentials */ constructor(credentials: PollyTTSCredentials); /** * Get available voices from the provider * @returns Promise resolving to an array of voice objects */ protected _getVoices(): Promise<any[]>; /** * Get available voices from the provider with caching * @returns Promise resolving to an array of unified voice objects */ getVoices(): Promise<UnifiedVoice[]>; /** * Map AWS Polly voice objects to unified format * @param rawVoices Array of AWS Polly voice objects * @returns Promise resolving to an array of unified voice objects */ protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>; /** * Get the appropriate engine for a voice based on supported engines * @param voiceId Voice ID to check * @returns The best engine to use for this voice */ private getEngineForVoice; /** * Get SSML support level for a voice based on its engine type * @param voiceId Voice ID to check * @returns SSML support level: "full", "limited", or "none" */ private getSSMLSupportLevel; /** * Strip unsupported SSML tags for limited SSML engines * Based on AWS Polly documentation for neural and generative voices * @param ssml SSML text to process * @returns SSML with unsupported tags removed */ private stripUnsupportedSSMLTags; /** * Prepare SSML for AWS Polly based on voice engine capabilities * @param text Text or SSML to prepare * @param options Synthesis options * @returns Promise resolving to prepared SSML or plain text */ private prepareSSML; /** * Convert text to audio bytes * @param text Text or SSML to synthesize * @param options Synthesis options * @returns Promise resolving to audio bytes */ synthToBytes(text: string, options?: PollyTTSOptions): Promise<Uint8Array>; /** * Synthesize text to a byte stream with word boundaries * @param text Text or SSML to synthesize * @param options Synthesis options * @returns Promise resolving to an object containing the audio stream and word boundaries */ synthToBytestream(text: string, options?: PollyTTSOptions): Promise<{ audioStream: ReadableStream<Uint8Array>; wordBoundaries: Array<{ text: string; offset: number; duration: number; }>; }>; /** * Strip SSML tags from text * @param text Text with SSML tags * @returns Plain text without SSML tags */ protected _stripSSML(text: string): string; /** * Add a WAV header to PCM audio data * This matches the Python implementation using wave.setparams((1, 2, 16000, 0, "NONE", "NONE")) * @param pcmData PCM audio data from AWS Polly (signed 16-bit, 1 channel, little-endian) * @param sampleRate Sample rate in Hz (default: 16000) * @returns PCM audio data with WAV header */ private addWavHeader; /** * Get the list of required credential types for this engine * @returns Array of required credential field names */ protected getRequiredCredentials(): string[]; /** * Check if credentials are valid * @returns Promise resolving to true if credentials are valid */ checkCredentials(): Promise<boolean>; }