UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

286 lines 11.5 kB
/** * Text-to-Speech (TTS) Processing Utility * * Central orchestrator for all TTS operations across providers. * Manages provider-specific TTS handlers and audio generation. * * @module utils/ttsProcessor */ import { logger } from "./logger.js"; import { ErrorCategory, ErrorSeverity } from "../constants/enums.js"; import { NeuroLinkError } from "./errorHandling.js"; import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../observability/index.js"; /** * TTS-specific error codes */ export const TTS_ERROR_CODES = { EMPTY_TEXT: "TTS_EMPTY_TEXT", TEXT_TOO_LONG: "TTS_TEXT_TOO_LONG", PROVIDER_NOT_SUPPORTED: "TTS_PROVIDER_NOT_SUPPORTED", PROVIDER_NOT_CONFIGURED: "TTS_PROVIDER_NOT_CONFIGURED", SYNTHESIS_FAILED: "TTS_SYNTHESIS_FAILED", INVALID_INPUT: "TTS_INVALID_INPUT", }; /** * TTS Error class for text-to-speech specific errors */ export class TTSError extends NeuroLinkError { constructor(options) { super({ code: options.code, message: options.message, category: options.category ?? ErrorCategory.VALIDATION, severity: options.severity ?? ErrorSeverity.MEDIUM, retriable: options.retriable ?? false, context: options.context, originalError: options.originalError, }); this.name = "TTSError"; } } /** * TTS processor class for orchestrating text-to-speech operations * * Follows the same pattern as CSVProcessor, ImageProcessor, and PDFProcessor. * Provides a unified interface for TTS generation across multiple providers. * * @example * ```typescript * // Register a handler * TTSProcessor.registerHandler('google-ai', googleAIHandler); * * // Check if provider is supported * if (TTSProcessor.supports('google-ai')) { * // Provider is registered * } * ``` */ export class TTSProcessor { /** * Handler registry mapping provider names to TTS handlers * Uses Map for O(1) lookups and better type safety * * @private */ static handlers = new Map(); /** * Default maximum text length for TTS synthesis (in bytes) * * Providers can override this value by specifying the `maxTextLength` property * in their respective `TTSHandler` implementation. If not specified, this default * value will be used. * * @private */ static DEFAULT_MAX_TEXT_LENGTH = 3000; /** * Register a TTS handler for a specific provider * * Allows providers to register their TTS implementation at runtime. * * @param providerName - Provider identifier (e.g., 'google-ai', 'openai') * @param handler - TTS handler implementation * * @example * ```typescript * const googleHandler: TTSHandler = { * synthesize: async (text, options) => { ... }, * getVoices: async (languageCode) => { ... }, * isConfigured: () => true * }; * * TTSProcessor.registerHandler('google-ai', googleHandler); * ``` */ static registerHandler(providerName, handler) { if (!providerName) { throw new Error("Provider name is required"); } if (!handler) { throw new Error("Handler is required"); } const normalizedName = providerName.toLowerCase(); if (this.handlers.has(normalizedName)) { logger.warn(`[TTSProcessor] Overwriting existing handler for provider: ${normalizedName}`); } this.handlers.set(normalizedName, handler); logger.debug(`[TTSProcessor] Registered TTS handler for provider: ${normalizedName}`); } /** * Get a registered TTS handler by provider name * * @private * @param providerName - Provider identifier * @returns Handler instance or undefined if not registered */ static getHandler(providerName) { const normalizedName = providerName.toLowerCase(); return this.handlers.get(normalizedName); } /** * Check if a provider is supported (has a registered TTS handler) * * @param providerName - Provider identifier * @returns True if handler is registered * * @example * ```typescript * if (TTSProcessor.supports('google-ai')) { * console.log('Google AI TTS is supported'); * } * ``` */ static supports(providerName) { if (!providerName) { logger.error("[TTSProcessor] Provider name is required for supports check"); return false; } const normalizedName = providerName.toLowerCase(); const isSupported = this.handlers.has(normalizedName); if (!isSupported) { logger.debug(`[TTSProcessor] Provider ${providerName} is not supported`); } return isSupported; } /** * Synthesize speech from text using a registered TTS provider * * Orchestrates the text-to-speech generation process: * 1. Validates input text (not empty, within length limits) * 2. Looks up the provider handler * 3. Verifies provider configuration * 4. Delegates synthesis to the provider (timeout handled by provider) * 5. Enriches result with metadata * * **Timeout Handling:** * Timeouts are enforced by individual provider implementations (see TTSHandler interface). * Providers typically use 30-second timeouts via `withTimeout()` utility or * provider-specific timeout mechanisms (e.g., Google Cloud client timeout). * * @param text - Text to convert to speech * @param provider - Provider identifier * @param options - TTS configuration options * @returns Audio result with buffer and metadata * @throws TTSError if validation fails or provider not supported/configured * * @example * ```typescript * const result = await TTSProcessor.synthesize("Hello, world!", "google-ai", { * voice: "en-US-Neural2-C", * format: "mp3", * speed: 1.0 * }); * * console.log(`Generated ${result.size} bytes of ${result.format} audio`); * // Save to file or play the audio buffer * ``` */ static async synthesize(text, provider, options) { // Create span early so preflight failures are captured const span = SpanSerializer.createSpan(SpanType.TTS, "tts.synthesize", { "tts.operation": "synthesize", "tts.provider": provider, "tts.voice": options.voice, "tts.format": options.format, }); try { // Trim the text once at the start const trimmedText = text.trim(); // 1. Text validation: reject empty text if (!trimmedText) { logger.error("[TTSProcessor] Text is required for synthesis"); throw new TTSError({ code: TTS_ERROR_CODES.EMPTY_TEXT, message: "Text is required for TTS synthesis", severity: ErrorSeverity.LOW, retriable: false, context: { provider }, }); } // 2. Handler lookup and error if provider not supported const handler = this.getHandler(provider); if (!handler) { logger.error(`[TTSProcessor] Provider "${provider}" is not registered`); throw new TTSError({ code: TTS_ERROR_CODES.PROVIDER_NOT_SUPPORTED, message: `TTS provider "${provider}" is not supported. Use TTSProcessor.registerHandler() to register it.`, severity: ErrorSeverity.HIGH, retriable: false, context: { provider, availableProviders: Array.from(this.handlers.keys()), }, }); } // 3. Text validation: reject text exceeding provider-specific max length const maxTextLength = handler.maxTextLength ?? this.DEFAULT_MAX_TEXT_LENGTH; if (trimmedText.length > maxTextLength) { logger.error(`[TTSProcessor] Text exceeds maximum length of ${maxTextLength} characters for provider "${provider}"`); throw new TTSError({ code: TTS_ERROR_CODES.TEXT_TOO_LONG, message: `Text length (${trimmedText.length}) exceeds maximum allowed length (${maxTextLength} characters) for provider "${provider}"`, severity: ErrorSeverity.MEDIUM, retriable: false, context: { provider, textLength: trimmedText.length, maxLength: maxTextLength, }, }); } // 4. Configuration check if (!handler.isConfigured()) { logger.warn(`[TTSProcessor] Provider "${provider}" is not properly configured`); throw new TTSError({ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED, message: `TTS provider "${provider}" is not configured. Please set the required API keys.`, category: ErrorCategory.CONFIGURATION, severity: ErrorSeverity.HIGH, retriable: false, context: { provider }, }); } logger.debug(`[TTSProcessor] Starting synthesis with provider: ${provider}`); // 5. Call handler.synthesize() - providers handle their own timeouts const result = await handler.synthesize(trimmedText, options); // 6. Post-processing: add metadata const enrichedResult = { ...result, voice: result.voice ?? options.voice, }; logger.info(`[TTSProcessor] Successfully synthesized ${result.size} bytes of audio`); // 7. Record successful span const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK); getMetricsAggregator().recordSpan(endedSpan); // 8. Returns TTSResult with buffer, format, metadata return enrichedResult; } catch (err) { // Record error span const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR, err instanceof Error ? err.message : String(err)); getMetricsAggregator().recordSpan(endedSpan); // 9. Comprehensive error handling // Re-throw TTSError as-is if (err instanceof TTSError) { throw err; } // Wrap other errors in TTSError const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error"); logger.error(`[TTSProcessor] Synthesis failed for provider "${provider}": ${errorMessage}`); throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: `TTS synthesis failed for provider "${provider}": ${errorMessage}`, category: ErrorCategory.EXECUTION, severity: ErrorSeverity.HIGH, retriable: true, context: { provider, textLength: text.trim().length, options, }, originalError: err instanceof Error ? err : undefined, }); } } } //# sourceMappingURL=ttsProcessor.js.map