expo-edge-speech
Version:
Text-to-speech library for Expo using Microsoft Edge TTS service
448 lines • 12.6 kB
TypeScript
/**
* Type definitions for expo-edge-speech
* Compatible with expo-speech API
*/
import type { InterruptionModeAndroid, InterruptionModeIOS } from "expo-av";
/**
* Basic speech event callback type (no parameters)
*/
export type SpeechEventCallback = () => void;
/**
* Word boundary event data
* Matches expo-speech onBoundary callback data
*/
export interface WordBoundary {
charIndex: number;
charLength: number;
}
/**
* Speech error interface
* Basic error information for speech operations
*/
export interface SpeechError {
name: string;
message: string;
code?: string | number;
}
/**
* Speech options interface
* Matches expo-speech SpeechOptions interface exactly
*/
export interface SpeechOptions {
language?: string;
voice?: string;
pitch?: number;
rate?: number;
volume?: number;
onStart?: (() => void) | SpeechEventCallback;
onDone?: (() => void) | SpeechEventCallback;
onError?: ((error: Error) => void) | SpeechEventCallback;
onStopped?: (() => void) | SpeechEventCallback;
onBoundary?: ((boundary: WordBoundary) => void) | SpeechEventCallback;
onMark?: SpeechEventCallback | null;
onPause?: SpeechEventCallback | null;
onResume?: SpeechEventCallback | null;
}
/**
* Edge Speech Voice interface for Microsoft Edge TTS
* Enhanced interface representing a voice available from the Edge Speech service
* This is now the standard interface used throughout the project
*/
export interface EdgeSpeechVoice {
/** Unique voice identifier (e.g., "en-US-AriaNeural") */
identifier: string;
/** Human-readable display name */
name: string;
/** Language/locale code (e.g., "en-US") */
language: string;
/** Voice gender ("Male" or "Female") */
gender: "Male" | "Female";
/** Content categories this voice is suitable for */
contentCategories: string[];
/** Voice personality traits */
voicePersonalities: string[];
}
/**
* Boundary event data interface
* Based on actual Edge TTS structure
*/
export interface BoundaryEventData {
Type: "WordBoundary";
Data: {
Offset: number;
Duration: number;
text: {
Text: string;
Length: number;
BoundaryType: "WordBoundary";
};
};
}
/**
* Speech configuration interface
* Edge TTS specific configuration sections
*/
export interface SpeechConfiguration {
/** Audio format configuration (MP3 only for Edge TTS) */
audioFormat: {
format: "audio-24khz-48kbitrate-mono-mp3";
sampleRate: 24000;
bitRate: 48000;
channels: 1;
};
/** Connection pooling settings */
connectionPooling: {
maxConnections: number;
connectionTimeout: number;
reuseConnections: boolean;
};
/** Word boundary settings */
wordBoundary: {
enabled: boolean;
offsetCompensation: number;
};
}
/**
* Edge TTS WebSocket headers
*/
export interface EdgeTTSHeaders {
"X-RequestId": string;
"X-Timestamp": string;
"Content-Type": string;
Path: string;
}
/**
* Edge TTS text message (JSON)
*/
export interface EdgeTTSTextMessage {
headers: EdgeTTSHeaders;
body: string | object;
}
/**
* Edge TTS binary message structure
*/
export interface EdgeTTSBinaryMessage {
headers: EdgeTTSHeaders;
audioData: Uint8Array;
}
/**
* Binary audio message parsing structure
*/
export interface BinaryAudioMessage {
headerLength: number;
header: object;
audioData: Uint8Array;
}
/**
* No audio data received from Edge TTS service
*/
export declare class NoAudioReceived extends Error {
constructor(message?: string);
}
/**
* Unexpected response format from Edge TTS
*/
export declare class UnexpectedResponse extends Error {
constructor(message?: string);
}
/**
* Unknown response path from Edge TTS
*/
export declare class UnknownResponse extends Error {
constructor(message?: string);
}
/**
* WebSocket connection or communication error
*/
export declare class WebSocketError extends Error {
constructor(message?: string);
}
/**
* Clock skew adjustment error for Sec-MS-GEC token
*/
export declare class SkewAdjustmentError extends Error {
constructor(message?: string);
}
/**
* Edge TTS voice from voice list API
*/
export interface EdgeVoice {
Name: string;
ShortName: string;
Gender: string;
Locale: string;
SuggestedCodec: string;
Status: string;
}
/**
* Connection state enumeration
*/
export declare enum ConnectionState {
Disconnected = "disconnected",
Connecting = "connecting",
Connected = "connected",
Synthesizing = "synthesizing",
Error = "error"
}
/**
* Edge TTS connection interface
*/
export interface EdgeTTSConnection {
/** Connection ID: 32-character lowercase string (UUID without dashes) */
id: string;
/** WebSocket connection instance */
websocket: WebSocket | null;
/** Current connection state */
state: ConnectionState;
/** Creation timestamp */
createdAt: Date;
/** Last activity timestamp */
lastActivity: Date;
}
/**
* SSML configuration with Edge TTS requirements
*/
export interface SSMLConfig {
/** SSML namespace (required): http://www.w3.org/2001/10/synthesis */
namespace: "http://www.w3.org/2001/10/synthesis";
/** Voice name format: Microsoft Server Speech Text to Speech Voice (language, voiceName) */
voiceNameFormat: string;
/** Maximum text length for Edge TTS */
maxTextLength: number;
}
/**
* SSML prosody options
*/
export interface SSMLProsody {
rate?: string;
pitch?: string;
volume?: string;
}
/**
* Word boundary timing with offset compensation
*/
export interface WordBoundaryTiming {
/** Raw offset from Edge TTS (in ticks) */
rawOffset: number;
/** Adjusted offset with padding compensation (8,750,000 ticks = 875ms) */
adjustedOffset: number;
/** Offset in milliseconds */
offsetMs: number;
/** Duration in ticks */
duration: number;
/** Duration in milliseconds */
durationMs: number;
}
/**
* Timing conversion utilities
*/
export interface TimingConverter {
/** Convert ticks to milliseconds (10,000 ticks = 1ms) */
ticksToMs: (ticks: number) => number;
/** Convert milliseconds to ticks */
msToTicks: (ms: number) => number;
/** Apply offset compensation for word boundaries */
compensateOffset: (rawOffset: number) => number;
}
/**
* Sec-MS-GEC token generation parameters
*/
export interface SecMSGECToken {
/** Current time in Windows file time format (100-nanosecond intervals since 1601-01-01) */
ticks: number;
/** Clock skew adjustment in ticks (5 minutes = 3,000,000,000 ticks) */
skewAdjustment: number;
/** Hash input format: {ticks}MSEdgeSpeechTTS */
hashInput: string;
/** Generated token (uppercase SHA256 hex) */
token: string;
}
/**
* Authentication headers for Edge TTS
*/
export interface EdgeTTSAuthHeaders {
"User-Agent": string;
Origin: string;
"Sec-MS-GEC": string;
"Sec-MS-GEC-Version": string;
}
/**
* Edge TTS WebSocket message paths
*/
export declare enum MessagePath {
SpeechConfig = "speech.config",
SSML = "ssml",
TurnStart = "turn.start",
AudioMetadata = "audio.metadata",
TurnEnd = "turn.end"
}
/**
* Content types for Edge TTS messages
*/
export declare enum ContentType {
JSON = "application/json; charset=utf-8",
SSML = "application/ssml+xml"
}
/**
* Metadata options for audio synthesis
*/
export interface MetadataOptions {
/** Enable sentence boundary events */
sentenceBoundaryEnabled: boolean;
/** Enable word boundary events */
wordBoundaryEnabled: boolean;
}
/**
* Complete synthesis context configuration
*/
export interface SynthesisContext {
synthesis: {
audio: {
metadataoptions: MetadataOptions;
outputFormat: string;
};
};
}
/**
* SSML synthesis request message
*/
export interface SSMLRequest {
headers: {
"X-RequestId": string;
"X-Timestamp": string;
"Content-Type": "application/ssml+xml";
Path: "ssml";
};
body: string;
}
/**
* Platform-specific audio configuration for expo-av Audio.setAudioModeAsync()
* Supports iOS and Android platforms only
*/
export interface PlatformAudioConfig {
ios: {
/** Whether audio stays active in background - not available in Expo Go for iOS */
staysActiveInBackground?: boolean;
/** Whether audio plays when device is in silent mode - iOS only */
playsInSilentModeIOS?: boolean;
/** Audio interruption mode for iOS - required */
interruptionModeIOS: InterruptionModeIOS;
};
android: {
/** Whether audio stays active in background */
staysActiveInBackground?: boolean;
/** Whether TTS should lower other audio while playing - Android only */
shouldDuckAndroid?: boolean;
/** Whether audio plays through earpiece - Android only */
playThroughEarpieceAndroid?: boolean;
/** Audio interruption mode for Android - required */
interruptionModeAndroid: InterruptionModeAndroid;
};
}
/**
* Audio service configuration interface
*/
export interface SpeechAudioConfig {
/** Platform-specific audio configurations */
platformConfig?: PlatformAudioConfig;
/** Audio loading timeout in milliseconds */
loadingTimeout?: number;
/** Whether to initialize audio session automatically */
autoInitializeAudioSession?: boolean;
}
/**
* Network service configuration interface
*/
export interface SpeechNetworkConfig {
/** Maximum number of retry attempts */
maxRetries?: number;
/** Base retry delay in milliseconds */
baseRetryDelay?: number;
/** Maximum retry delay in milliseconds */
maxRetryDelay?: number;
/** Connection timeout in milliseconds */
connectionTimeout?: number;
/** Graceful close timeout in milliseconds */
gracefulCloseTimeout?: number;
/** Enable debug logging */
enableDebugLogging?: boolean;
}
/**
* Storage service configuration interface
*/
export interface SpeechStorageConfig {
/** Maximum buffer size per connection (16MB) */
maxBufferSize?: number;
/** Cleanup interval in milliseconds */
cleanupInterval?: number;
/** Memory usage warning threshold (80% of limit) */
warningThreshold?: number;
}
/**
* Voice service configuration interface
*/
export interface SpeechVoiceConfig {
/** Cache TTL in milliseconds */
cacheTTL?: number;
/** Enable debug logging */
enableDebugLogging?: boolean;
/** Network timeout for voice list fetching */
networkTimeout?: number;
}
/**
* Circuit breaker configuration
*/
export interface CircuitBreakerConfig {
/** Failure threshold to open circuit */
failureThreshold?: number;
/** Recovery timeout before testing */
recoveryTimeout?: number;
/** Number of test requests in half-open state */
testRequestLimit?: number;
}
/**
* Connection manager configuration interface
*/
export interface SpeechConnectionConfig {
/** Maximum concurrent connections */
maxConnections?: number;
/** Connection timeout in milliseconds */
connectionTimeout?: number;
/** Circuit breaker configuration */
circuitBreaker?: CircuitBreakerConfig;
/** Enable connection pooling for performance */
poolingEnabled?: boolean;
}
/**
* Main Speech API configuration interface
* New in v2.0: Allows configuration of all internal services before initialization
*/
export interface SpeechAPIConfig {
/** Network service configuration */
network?: SpeechNetworkConfig;
/** Audio service configuration */
audio?: SpeechAudioConfig;
/** Storage service configuration */
storage?: SpeechStorageConfig;
/** Connection manager configuration */
connection?: SpeechConnectionConfig;
/** Voice service configuration */
voice?: SpeechVoiceConfig;
/** Optional state configuration */
state?: SpeechStateConfig;
}
/**
* Speech state configuration interface
*/
export interface SpeechStateConfig {
/** Initial speech state */
initialState?: ConnectionState;
/** Enable/disable event logging */
enableLogging?: boolean;
/** Custom event handlers */
eventHandlers?: {
onStateChange?: (newState: ConnectionState, oldState: ConnectionState) => void;
onError?: (error: SpeechError) => void;
};
}
//# sourceMappingURL=types.d.ts.map