UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

311 lines 11.8 kB
/** * ElevenLabs Text-to-Speech Handler * * Implementation of TTS using ElevenLabs API. * * @module voice/providers/ElevenLabsTTS */ import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js"; import { logger } from "../../utils/logger.js"; import { TTS_ERROR_CODES, TTSError } from "../../utils/ttsProcessor.js"; /** * ElevenLabs Text-to-Speech Handler * * Supports high-quality multilingual TTS with voice cloning. * * @see https://elevenlabs.io/docs/api-reference */ export class ElevenLabsTTS { apiKey; baseUrl = "https://api.elevenlabs.io/v1"; voicesCache = null; static CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes /** * Maximum text length (5000 characters) */ maxTextLength = 5000; constructor(apiKey) { const resolvedKey = (apiKey ?? process.env.ELEVENLABS_API_KEY ?? "").trim(); this.apiKey = resolvedKey.length > 0 ? resolvedKey : null; } isConfigured() { return this.apiKey !== null; } async getVoices(languageCode) { if (!this.apiKey) { throw new TTSError({ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED, message: "ElevenLabs API key not configured", category: ErrorCategory.CONFIGURATION, severity: ErrorSeverity.HIGH, retriable: false, }); } // Return cached voices if valid if (this.voicesCache && Date.now() - this.voicesCache.timestamp < ElevenLabsTTS.CACHE_TTL_MS && !languageCode) { return this.voicesCache.voices; } try { const voicesController = new AbortController(); const voicesTimeoutId = setTimeout(() => voicesController.abort(), 30000); let response; try { response = await fetch(`${this.baseUrl}/voices`, { method: "GET", headers: { "xi-api-key": this.apiKey, }, signal: voicesController.signal, }); } catch (fetchErr) { if (fetchErr instanceof Error && fetchErr.name === "AbortError") { throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: "ElevenLabs voices request timed out after 30 seconds", category: ErrorCategory.NETWORK, severity: ErrorSeverity.MEDIUM, retriable: true, originalError: fetchErr, }); } throw fetchErr; } finally { clearTimeout(voicesTimeoutId); } if (!response.ok) { throw new Error(`HTTP ${response.status}`); } const data = (await response.json()); let voices = data.voices.map((voice) => ({ id: voice.voice_id, name: voice.name, languageCode: "en", // ElevenLabs supports multiple languages per voice languageCodes: [ "en", "es", "fr", "de", "it", "pt", "pl", "hi", "ar", "zh", "ja", "ko", ], gender: this.mapGender(voice.labels?.gender), type: "neural", description: voice.labels?.description, })); // Filter by language if specified if (languageCode) { const requested = languageCode.toLowerCase(); const requestedBase = requested.split("-")[0]; voices = voices.filter((v) => v.languageCodes?.some((code) => { const c = code.toLowerCase(); return (c === requested || c === requestedBase || c.startsWith(requestedBase)); })); } // Cache voices if (!languageCode) { this.voicesCache = { voices, timestamp: Date.now() }; } return voices; } catch (err) { const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error"); logger.error(`[ElevenLabsTTSHandler] Failed to get voices: ${errorMessage}`); throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: `Failed to get voices: ${errorMessage}`, category: ErrorCategory.NETWORK, severity: ErrorSeverity.MEDIUM, retriable: true, originalError: err instanceof Error ? err : undefined, }); } } async synthesize(text, options = {}) { if (!this.apiKey) { throw new TTSError({ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED, message: "ElevenLabs API key not configured", category: ErrorCategory.CONFIGURATION, severity: ErrorSeverity.HIGH, retriable: false, }); } const startTime = Date.now(); const elevenOptions = options; try { // Get voice ID (use default if not specified) const voiceId = options.voice ?? "21m00Tcm4TlvDq8ikWAM"; // Rachel voice as default // Determine model const model = elevenOptions.model ?? "eleven_multilingual_v2"; // Build request body const requestBody = { text, model_id: model, voice_settings: { stability: elevenOptions.stability ?? 0.5, similarity_boost: elevenOptions.similarityBoost ?? 0.75, style: elevenOptions.style ?? 0.0, use_speaker_boost: elevenOptions.useSpeakerBoost ?? true, }, }; // Determine output format const outputFormat = this.mapFormat(options.format ?? "mp3"); const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30000); let response; try { response = await fetch(`${this.baseUrl}/text-to-speech/${voiceId}?output_format=${outputFormat}`, { method: "POST", headers: { "xi-api-key": this.apiKey, "Content-Type": "application/json", }, body: JSON.stringify(requestBody), signal: controller.signal, }); } catch (fetchErr) { if (fetchErr instanceof Error && fetchErr.name === "AbortError") { throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: "ElevenLabs TTS request timed out after 30 seconds", category: ErrorCategory.NETWORK, severity: ErrorSeverity.HIGH, retriable: true, originalError: fetchErr, }); } throw fetchErr; } finally { clearTimeout(timeoutId); } if (!response.ok) { const errorData = await response .json() .catch(() => Object.create(null)); const errorMessage = errorData.detail?.message || `HTTP ${response.status}`; throw new Error(errorMessage); } const latency = Date.now() - startTime; // Get audio buffer const arrayBuffer = await response.arrayBuffer(); const audioBuffer = Buffer.from(arrayBuffer); const result = { buffer: audioBuffer, // Use the *effective* output format from outputFormat, not the // requested format — otherwise unsupported requests that fell back to // mp3_44100_128 would mislabel the buffer (Copilot review). format: this.effectiveFormat(outputFormat), size: audioBuffer.length, voice: voiceId, sampleRate: this.getSampleRate(outputFormat), metadata: { latency, provider: "elevenlabs-tts", model, requestedFormat: options.format, outputFormat, }, }; logger.info(`[ElevenLabsTTSHandler] Synthesized ${audioBuffer.length} bytes in ${latency}ms`); return result; } catch (err) { if (err instanceof TTSError) { throw err; } const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error"); logger.error(`[ElevenLabsTTSHandler] Synthesis failed: ${errorMessage}`); throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: `Synthesis failed: ${errorMessage}`, category: ErrorCategory.EXECUTION, severity: ErrorSeverity.HIGH, retriable: true, context: { textLength: text.length }, originalError: err instanceof Error ? err : undefined, }); } } /** * Map gender string to standard type */ mapGender(gender) { if (!gender) { return "neutral"; } const lower = gender.toLowerCase(); if (lower.includes("male") && !lower.includes("female")) { return "male"; } if (lower.includes("female")) { return "female"; } return "neutral"; } /** * Map TTSAudioFormat to ElevenLabs output format */ mapFormat(format) { const formats = { mp3: "mp3_44100_128", wav: "pcm_44100", ogg: "ogg_22050", opus: "ogg_22050", }; return formats[format] ?? "mp3_44100_128"; } /** * Get sample rate from format string */ getSampleRate(format) { if (format.includes("44100")) { return 44100; } if (format.includes("22050")) { return 22050; } if (format.includes("24000")) { return 24000; } return 44100; } /** * Map the ElevenLabs `output_format` string back to a canonical * TTSAudioFormat. mapFormat() falls back to mp3_44100_128 for unsupported * inputs, so this is needed to keep TTSResult.format honest. * * NOTE: ElevenLabs `pcm_*` outputs are RAW 16-bit signed-LE PCM samples * with no RIFF/WAV header. We surface that as `pcm16` (which exists in the * `TTSAudioFormat` union exactly for this case) — labeling it as `wav` * would cause consumers writing the buffer to a `.wav` file or feeding it * to a WAV parser to produce unplayable output (CodeRabbit review). */ effectiveFormat(outputFormat) { if (outputFormat.startsWith("mp3")) { return "mp3"; } if (outputFormat.startsWith("pcm")) { return "pcm16"; } if (outputFormat.startsWith("ogg")) { return "opus"; } return "mp3"; } } //# sourceMappingURL=ElevenLabsTTS.js.map