@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
183 lines (182 loc) • 6.62 kB
JavaScript
/**
* Fish Audio TTS Handler
*
* Implementation of TTS using Fish Audio API. Lower-cost alternative to
* ElevenLabs with strong multilingual support and 15s voice cloning.
*
* @module voice/providers/FishAudioTTS
* @see https://docs.fish.audio/text-to-speech/text-to-speech
*/
import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js";
import { logger } from "../../utils/logger.js";
import { TTS_ERROR_CODES, TTSError } from "../../utils/ttsProcessor.js";
const DEFAULT_BASE_URL = "https://api.fish.audio";
const REQUEST_TIMEOUT_MS = 30_000;
/**
* Default reference voice — "Energetic Male" by official author `lengyue`,
* a long-standing public English voice on Fish Audio.
*
* @see https://fish.audio (model id 802e3bc2b27e49c2995d23ef70e6ac89)
*
* Note: the previous default `fb6c0e1ea91e427fb9a93b9bbf0a1e4d` was
* removed upstream and started returning 400 "Reference not found".
*/
const DEFAULT_REFERENCE_ID = "802e3bc2b27e49c2995d23ef70e6ac89";
/**
* Fish Audio Text-to-Speech Handler.
*
* Auth: `Authorization: Bearer ${FISH_AUDIO_API_KEY}`.
* Models: speech-1.5 (standard), speech-1.6, s1 (default; latest).
*/
export class FishAudioTTS {
maxTextLength = 5000;
apiKey;
baseUrl;
constructor(apiKey) {
const resolved = (apiKey ?? process.env.FISH_AUDIO_API_KEY ?? "").trim();
this.apiKey = resolved.length > 0 ? resolved : null;
this.baseUrl = (process.env.FISH_AUDIO_BASE_URL ?? DEFAULT_BASE_URL).replace(/\/$/, "");
}
isConfigured() {
return this.apiKey !== null;
}
async synthesize(text, options = {}) {
if (!this.apiKey) {
throw new TTSError({
code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
message: "FISH_AUDIO_API_KEY not configured",
category: ErrorCategory.CONFIGURATION,
severity: ErrorSeverity.HIGH,
retriable: false,
});
}
const startTime = Date.now();
const referenceId = options.voice ?? DEFAULT_REFERENCE_ID;
const requestedFormat = options.format ?? "mp3";
const upstreamFormat = this.mapFormat(requestedFormat);
const body = {
text,
reference_id: referenceId,
format: upstreamFormat,
mp3_bitrate: 128,
chunk_length: 200,
normalize: true,
latency: "normal",
};
const fishOpts = options;
if (fishOpts.model) {
body.model = fishOpts.model;
}
if (fishOpts.latency) {
body.latency = fishOpts.latency;
}
if (fishOpts.mp3Bitrate !== undefined) {
body.mp3_bitrate = fishOpts.mp3Bitrate;
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
let response;
try {
response = await fetch(`${this.baseUrl}/v1/tts`, {
method: "POST",
headers: {
Authorization: `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
signal: controller.signal,
});
}
catch (err) {
if (err instanceof Error && err.name === "AbortError") {
throw new TTSError({
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
message: `Fish Audio request timed out after ${REQUEST_TIMEOUT_MS / 1000}s`,
category: ErrorCategory.NETWORK,
severity: ErrorSeverity.HIGH,
retriable: true,
originalError: err,
});
}
throw new TTSError({
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
message: `Fish Audio network error: ${err instanceof Error ? err.message : String(err)}`,
category: ErrorCategory.NETWORK,
severity: ErrorSeverity.HIGH,
retriable: true,
originalError: err instanceof Error ? err : undefined,
});
}
finally {
clearTimeout(timeoutId);
}
if (!response.ok) {
const text = await response.text();
const retriable = response.status === 408 ||
response.status === 429 ||
response.status >= 500;
throw new TTSError({
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
message: `Fish Audio synthesis failed: ${response.status} — ${text}`,
category: retriable ? ErrorCategory.NETWORK : ErrorCategory.EXECUTION,
severity: ErrorSeverity.HIGH,
retriable,
context: { status: response.status, referenceId, upstreamFormat },
});
}
const arrayBuffer = await response.arrayBuffer();
const audioBuffer = Buffer.from(arrayBuffer);
const latency = Date.now() - startTime;
const effectiveFormat = this.effectiveFormat(upstreamFormat);
const result = {
buffer: audioBuffer,
format: effectiveFormat,
size: audioBuffer.length,
voice: referenceId,
sampleRate: this.getSampleRate(effectiveFormat),
metadata: {
latency,
provider: "fish-audio",
model: fishOpts.model ?? "s1",
requestedFormat: options.format,
upstreamFormat,
},
};
logger.info(`[FishAudioTTS] Synthesized ${audioBuffer.length} bytes in ${latency}ms`);
return result;
}
mapFormat(format) {
const supported = {
mp3: "mp3",
wav: "wav",
pcm16: "pcm",
};
const mapped = supported[format];
if (!mapped) {
logger.warn(`[FishAudioTTS] Unsupported format "${format}" — falling back to "mp3"`);
return "mp3";
}
return mapped;
}
effectiveFormat(upstreamFormat) {
if (upstreamFormat === "mp3") {
return "mp3";
}
if (upstreamFormat === "wav") {
return "wav";
}
if (upstreamFormat === "pcm") {
return "pcm16";
}
return "mp3";
}
getSampleRate(format) {
if (format === "wav") {
return 44_100;
}
if (format === "pcm16") {
return 44_100;
}
return 44_100;
}
}