UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

183 lines (182 loc) 6.62 kB
/** * Fish Audio TTS Handler * * Implementation of TTS using Fish Audio API. Lower-cost alternative to * ElevenLabs with strong multilingual support and 15s voice cloning. * * @module voice/providers/FishAudioTTS * @see https://docs.fish.audio/text-to-speech/text-to-speech */ import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js"; import { logger } from "../../utils/logger.js"; import { TTS_ERROR_CODES, TTSError } from "../../utils/ttsProcessor.js"; const DEFAULT_BASE_URL = "https://api.fish.audio"; const REQUEST_TIMEOUT_MS = 30_000; /** * Default reference voice — "Energetic Male" by official author `lengyue`, * a long-standing public English voice on Fish Audio. * * @see https://fish.audio (model id 802e3bc2b27e49c2995d23ef70e6ac89) * * Note: the previous default `fb6c0e1ea91e427fb9a93b9bbf0a1e4d` was * removed upstream and started returning 400 "Reference not found". */ const DEFAULT_REFERENCE_ID = "802e3bc2b27e49c2995d23ef70e6ac89"; /** * Fish Audio Text-to-Speech Handler. * * Auth: `Authorization: Bearer ${FISH_AUDIO_API_KEY}`. * Models: speech-1.5 (standard), speech-1.6, s1 (default; latest). */ export class FishAudioTTS { maxTextLength = 5000; apiKey; baseUrl; constructor(apiKey) { const resolved = (apiKey ?? process.env.FISH_AUDIO_API_KEY ?? "").trim(); this.apiKey = resolved.length > 0 ? resolved : null; this.baseUrl = (process.env.FISH_AUDIO_BASE_URL ?? DEFAULT_BASE_URL).replace(/\/$/, ""); } isConfigured() { return this.apiKey !== null; } async synthesize(text, options = {}) { if (!this.apiKey) { throw new TTSError({ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED, message: "FISH_AUDIO_API_KEY not configured", category: ErrorCategory.CONFIGURATION, severity: ErrorSeverity.HIGH, retriable: false, }); } const startTime = Date.now(); const referenceId = options.voice ?? DEFAULT_REFERENCE_ID; const requestedFormat = options.format ?? "mp3"; const upstreamFormat = this.mapFormat(requestedFormat); const body = { text, reference_id: referenceId, format: upstreamFormat, mp3_bitrate: 128, chunk_length: 200, normalize: true, latency: "normal", }; const fishOpts = options; if (fishOpts.model) { body.model = fishOpts.model; } if (fishOpts.latency) { body.latency = fishOpts.latency; } if (fishOpts.mp3Bitrate !== undefined) { body.mp3_bitrate = fishOpts.mp3Bitrate; } const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); let response; try { response = await fetch(`${this.baseUrl}/v1/tts`, { method: "POST", headers: { Authorization: `Bearer ${this.apiKey}`, "Content-Type": "application/json", }, body: JSON.stringify(body), signal: controller.signal, }); } catch (err) { if (err instanceof Error && err.name === "AbortError") { throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: `Fish Audio request timed out after ${REQUEST_TIMEOUT_MS / 1000}s`, category: ErrorCategory.NETWORK, severity: ErrorSeverity.HIGH, retriable: true, originalError: err, }); } throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: `Fish Audio network error: ${err instanceof Error ? err.message : String(err)}`, category: ErrorCategory.NETWORK, severity: ErrorSeverity.HIGH, retriable: true, originalError: err instanceof Error ? err : undefined, }); } finally { clearTimeout(timeoutId); } if (!response.ok) { const text = await response.text(); const retriable = response.status === 408 || response.status === 429 || response.status >= 500; throw new TTSError({ code: TTS_ERROR_CODES.SYNTHESIS_FAILED, message: `Fish Audio synthesis failed: ${response.status}${text}`, category: retriable ? ErrorCategory.NETWORK : ErrorCategory.EXECUTION, severity: ErrorSeverity.HIGH, retriable, context: { status: response.status, referenceId, upstreamFormat }, }); } const arrayBuffer = await response.arrayBuffer(); const audioBuffer = Buffer.from(arrayBuffer); const latency = Date.now() - startTime; const effectiveFormat = this.effectiveFormat(upstreamFormat); const result = { buffer: audioBuffer, format: effectiveFormat, size: audioBuffer.length, voice: referenceId, sampleRate: this.getSampleRate(effectiveFormat), metadata: { latency, provider: "fish-audio", model: fishOpts.model ?? "s1", requestedFormat: options.format, upstreamFormat, }, }; logger.info(`[FishAudioTTS] Synthesized ${audioBuffer.length} bytes in ${latency}ms`); return result; } mapFormat(format) { const supported = { mp3: "mp3", wav: "wav", pcm16: "pcm", }; const mapped = supported[format]; if (!mapped) { logger.warn(`[FishAudioTTS] Unsupported format "${format}" — falling back to "mp3"`); return "mp3"; } return mapped; } effectiveFormat(upstreamFormat) { if (upstreamFormat === "mp3") { return "mp3"; } if (upstreamFormat === "wav") { return "wav"; } if (upstreamFormat === "pcm") { return "pcm16"; } return "mp3"; } getSampleRate(format) { if (format === "wav") { return 44_100; } if (format === "pcm16") { return 44_100; } return 44_100; } }