js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
133 lines (132 loc) • 4.32 kB
TypeScript
import { AbstractTTSClient } from "../core/abstract-tts";
import type { SpeakOptions, TTSCredentials, UnifiedVoice } from "../types";
/**
* Extended options for Polly TTS
*/
export interface PollyTTSOptions extends SpeakOptions {
format?: 'mp3' | 'wav' | 'ogg';
filePath?: string;
}
/**
* AWS Polly TTS credentials
*/
export interface PollyTTSCredentials extends TTSCredentials {
/**
* AWS region
*/
region: string;
/**
* AWS access key ID
*/
accessKeyId: string;
/**
* AWS secret access key
*/
secretAccessKey: string;
}
/**
* AWS Polly TTS client
*/
export declare class PollyTTSClient extends AbstractTTSClient {
/**
* AWS Polly client
*/
private client;
private _pollyModule;
/**
* Cache of voice metadata for engine detection
*/
private voiceCache;
/**
* Create a new AWS Polly TTS client
* @param credentials AWS credentials
*/
constructor(credentials: PollyTTSCredentials);
/**
* Get available voices from the provider
* @returns Promise resolving to an array of voice objects
*/
protected _getVoices(): Promise<any[]>;
/**
* Get available voices from the provider with caching
* @returns Promise resolving to an array of unified voice objects
*/
getVoices(): Promise<UnifiedVoice[]>;
/**
* Map AWS Polly voice objects to unified format
* @param rawVoices Array of AWS Polly voice objects
* @returns Promise resolving to an array of unified voice objects
*/
protected _mapVoicesToUnified(rawVoices: any[]): Promise<UnifiedVoice[]>;
/**
* Get the appropriate engine for a voice based on supported engines
* @param voiceId Voice ID to check
* @returns The best engine to use for this voice
*/
private getEngineForVoice;
/**
* Get SSML support level for a voice based on its engine type
* @param voiceId Voice ID to check
* @returns SSML support level: "full", "limited", or "none"
*/
private getSSMLSupportLevel;
/**
* Strip unsupported SSML tags for limited SSML engines
* Based on AWS Polly documentation for neural and generative voices
* @param ssml SSML text to process
* @returns SSML with unsupported tags removed
*/
private stripUnsupportedSSMLTags;
/**
* Prepare SSML for AWS Polly based on voice engine capabilities
* @param text Text or SSML to prepare
* @param options Synthesis options
* @returns Promise resolving to prepared SSML or plain text
*/
private prepareSSML;
/**
* Convert text to audio bytes
* @param text Text or SSML to synthesize
* @param options Synthesis options
* @returns Promise resolving to audio bytes
*/
synthToBytes(text: string, options?: PollyTTSOptions): Promise<Uint8Array>;
/**
* Synthesize text to a byte stream with word boundaries
* @param text Text or SSML to synthesize
* @param options Synthesis options
* @returns Promise resolving to an object containing the audio stream and word boundaries
*/
synthToBytestream(text: string, options?: PollyTTSOptions): Promise<{
audioStream: ReadableStream<Uint8Array>;
wordBoundaries: Array<{
text: string;
offset: number;
duration: number;
}>;
}>;
/**
* Strip SSML tags from text
* @param text Text with SSML tags
* @returns Plain text without SSML tags
*/
protected _stripSSML(text: string): string;
/**
* Add a WAV header to PCM audio data
* This matches the Python implementation using wave.setparams((1, 2, 16000, 0, "NONE", "NONE"))
* @param pcmData PCM audio data from AWS Polly (signed 16-bit, 1 channel, little-endian)
* @param sampleRate Sample rate in Hz (default: 16000)
* @returns PCM audio data with WAV header
*/
private addWavHeader;
/**
* Get the list of required credential types for this engine
* @returns Array of required credential field names
*/
protected getRequiredCredentials(): string[];
/**
* Check if credentials are valid
* @returns Promise resolving to true if credentials are valid
*/
checkCredentials(): Promise<boolean>;
}