audiopod-sdk
Version:
AudioPod SDK for Node.js and React - Professional Audio Processing powered by AI
1,315 lines (1,311 loc) • 83.5 kB
JavaScript
import axios from 'axios';
import FormData from 'form-data';
import { useState, useEffect, useRef, useCallback } from 'react';
/**
* AudioPod SDK Types
* Type definitions for the AudioPod API
*/
var JobStatus;
(function (JobStatus) {
JobStatus["PENDING"] = "pending";
JobStatus["PROCESSING"] = "processing";
JobStatus["COMPLETED"] = "completed";
JobStatus["FAILED"] = "failed";
JobStatus["CANCELLED"] = "cancelled";
})(JobStatus || (JobStatus = {}));
var VoiceType;
(function (VoiceType) {
VoiceType["CUSTOM"] = "custom";
VoiceType["STANDARD"] = "standard";
})(VoiceType || (VoiceType = {}));
var TTSProvider;
(function (TTSProvider) {
TTSProvider["AUDIOPOD_SONIC"] = "audiopod_sonic";
TTSProvider["OPENAI"] = "openai";
TTSProvider["GOOGLE_GEMINI"] = "google_gemini";
})(TTSProvider || (TTSProvider = {}));
/**
* Voice Service
* Handles voice cloning and TTS operations
*/
class VoiceService {
constructor(client) {
this.client = client;
}
/**
* Clone a voice from an audio file
* This method first creates a voice profile, then uses it for cloning
*/
async cloneVoice(request) {
const { voiceFile, text, language, speed = 1.0, waitForCompletion = false, timeout = 300000 } = request;
// Validate inputs
if (!text || text.trim().length === 0) {
throw new Error('Text input cannot be empty');
}
// Step 1: Create a temporary voice profile from the uploaded file
const voiceProfileName = `temp_voice_${Date.now()}`;
const voiceProfile = await this.createVoiceProfile(voiceProfileName, voiceFile, 'Temporary voice profile for cloning', false, // not public
true, // wait for completion
timeout);
// Step 2: Use the voice profile for cloning
const data = {
voice_id: voiceProfile.id,
input_text: text.trim(),
target_language: language?.toLowerCase(),
generation_params: speed !== 1.0 ? { speed } : undefined
};
const job = await this.client.post('/voice/voice-clone', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
duration: result.duration
};
}
return { job };
}
/**
* Create a reusable voice profile
*/
async createVoiceProfile(name, voiceFile, description, isPublic = false, waitForCompletion = false, timeout = 600000) {
if (!name || name.trim().length === 0) {
throw new Error('Voice profile name cannot be empty');
}
if (name.length > 100) {
throw new Error('Voice profile name too long (max 100 characters)');
}
let fileData;
if (typeof voiceFile === 'string') {
const fs = await import('fs');
const path = await import('path');
if (!fs.existsSync(voiceFile)) {
throw new Error(`Voice file not found: ${voiceFile}`);
}
const fileBuffer = fs.readFileSync(voiceFile);
const fileName = path.basename(voiceFile);
fileData = new Blob([fileBuffer], { type: 'audio/*' });
fileData.name = fileName;
}
else {
fileData = voiceFile;
}
const formData = {
name: name.trim(),
is_public: isPublic
};
if (description) {
formData.description = description.trim();
}
const response = await this.client.uploadFile('/voice/voice-profiles', fileData, formData);
if (waitForCompletion) {
// Poll for voice profile completion
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const voiceProfile = await this.getVoiceProfile(response.id);
if (voiceProfile.status === 'completed') {
return voiceProfile;
}
else if (voiceProfile.status === 'failed') {
throw new Error(`Voice profile creation failed: ${voiceProfile.status}`);
}
await new Promise(resolve => setTimeout(resolve, 5000));
}
throw new Error('Voice profile creation timed out');
}
return response;
}
/**
* Generate speech using an existing voice profile
*/
async generateSpeech(voiceId, text, options = {}) {
const { language, speed = 1.0, audioFormat = 'mp3', waitForCompletion = false, timeout = 300000 } = options;
if (!text || text.trim().length === 0) {
throw new Error('Text input cannot be empty');
}
if (speed < 0.5 || speed > 2.0) {
throw new Error('Speed must be between 0.5 and 2.0');
}
const data = {
input_text: text.trim(),
speed,
audio_format: audioFormat
};
if (language) {
data.language = language.toLowerCase();
}
const response = await this.client.post(`/voice/voices/${voiceId}/generate`, data);
if (response.job_id) {
// It's an async job
const job = { id: response.job_id, ...response };
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
duration: result.duration
};
}
return { job };
}
else {
// Direct response with audio URL
return {
job: { id: 0, status: 'completed' },
outputUrl: response.output_url || response.outputUrl,
duration: response.duration
};
}
}
/**
* List available voice profiles
*/
async listVoiceProfiles(options = {}) {
const { voiceType, isPublic, includePublic = true, limit = 50, skip = 0 } = options;
const params = {
limit,
skip,
include_public: includePublic
};
if (voiceType) {
params.voice_type = voiceType;
}
if (isPublic !== undefined) {
params.is_public = isPublic;
}
return this.client.get('/voice/voice-profiles', params);
}
/**
* Get details of a specific voice profile
*/
async getVoiceProfile(voiceId) {
return this.client.get(`/voice/voice-profiles/${voiceId}`);
}
/**
* Delete a voice profile
*/
async deleteVoiceProfile(voiceId) {
await this.client.delete(`/voice/voices/${voiceId}`);
}
/**
* Get voice cloning job status
*/
async getJobStatus(jobId) {
return this.client.get(`/voice/clone/${jobId}/status`);
}
/**
* List voice cloning jobs
*/
async listCloningJobs(options = {}) {
const params = {};
if (options.skip !== undefined) {
params.skip = options.skip;
}
if (options.limit !== undefined) {
params.limit = options.limit;
}
if (options.status) {
params.status = options.status;
}
return this.client.get('/voice/clone/jobs', params);
}
/**
* Stream voice generation (WebSocket connection)
*/
async streamVoiceGeneration(voiceId, text, options = {}) {
const { language, speed = 1.0, onProgress, onAudioChunk, onComplete, onError } = options;
try {
const WebSocket = (await import('ws')).default;
const config = this.client.getConfig();
const wsUrl = `${config.baseURL.replace('http', 'ws')}/api/v1/voice/ws/stream`;
const ws = new WebSocket(wsUrl, {
headers: {
'Authorization': `Bearer ${config.apiKey}`
}
});
ws.on('open', () => {
const message = {
type: 'voice_generation',
data: {
voice_id: voiceId,
input_text: text,
target_language: language,
speed
}
};
ws.send(JSON.stringify(message));
});
ws.on('message', (data) => {
try {
const message = JSON.parse(data.toString());
switch (message.type) {
case 'progress':
if (onProgress) {
onProgress(message.data.progress);
}
break;
case 'audio_chunk':
if (onAudioChunk) {
const audioData = Buffer.from(message.data.chunk, 'base64');
onAudioChunk(audioData.buffer);
}
break;
case 'complete':
if (onComplete) {
onComplete(message.data);
}
ws.close();
break;
case 'error':
if (onError) {
onError(new Error(message.data.message));
}
ws.close();
break;
}
}
catch (error) {
if (onError) {
onError(error);
}
}
});
ws.on('error', (error) => {
if (onError) {
onError(error);
}
});
}
catch (error) {
if (onError) {
onError(error);
}
}
}
/**
* Convert source audio to match target voice characteristics
*/
async convertVoice(request) {
const { sourceFile, url, voiceUuid, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!sourceFile && !url) {
throw new Error('Either sourceFile or url must be provided');
}
if (sourceFile && url) {
throw new Error('Cannot provide both sourceFile and url');
}
if (!voiceUuid || voiceUuid.trim().length === 0) {
throw new Error('Voice UUID cannot be empty');
}
let job;
if (url) {
// URL-based conversion
const formData = {
url,
voice_uuid: voiceUuid.trim()
};
job = await this.client.post('/voice/voice-convert', formData);
}
else {
// File-based conversion
let fileData;
if (typeof sourceFile === 'string') {
const fs = await import('fs');
const path = await import('path');
if (!fs.existsSync(sourceFile)) {
throw new Error(`Source audio file not found: ${sourceFile}`);
}
const fileBuffer = fs.readFileSync(sourceFile);
const fileName = path.basename(sourceFile);
fileData = new Blob([fileBuffer], { type: 'audio/*' });
fileData.name = fileName;
}
else {
fileData = sourceFile;
}
const formData = {
voice_uuid: voiceUuid.trim()
};
job = await this.client.uploadFile('/voice/voice-convert', fileData, formData);
}
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_path || result.outputUrl || result.output_url
};
}
return {
job,
outputUrl: undefined
};
}
/**
* Generate multi-voice TTS with multiple speakers
*/
async generateMultiVoiceTTS(request) {
const { segments, mixMode = 'sequential', outputFormat = 'wav', silenceDuration = 0.5, normalizeVolume = true, generationParams = {}, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!segments || segments.length === 0) {
throw new Error('At least one segment must be provided');
}
if (segments.length > 50) {
throw new Error('Maximum 50 segments allowed');
}
for (const segment of segments) {
if (!segment.text || segment.text.trim().length === 0) {
throw new Error('Segment text cannot be empty');
}
if (!segment.voiceId) {
throw new Error('Voice ID must be specified for each segment');
}
}
if (!['sequential', 'parallel', 'timed'].includes(mixMode)) {
throw new Error('Mix mode must be sequential, parallel, or timed');
}
if (!['wav', 'mp3', 'ogg'].includes(outputFormat)) {
throw new Error('Output format must be wav, mp3, or ogg');
}
const formData = {
segments: JSON.stringify(segments),
mix_mode: mixMode,
output_format: outputFormat,
silence_duration: silenceDuration,
normalize_volume: normalizeVolume,
generation_params: JSON.stringify(generationParams)
};
const job = await this.client.post('/voice/multi-voice-tts', formData);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_path || result.outputUrl || result.output_url,
totalDuration: result.total_duration || result.totalDuration,
segmentsCount: result.segments_count || result.segmentsCount || segments.length,
uniqueVoicesCount: result.unique_voices_count || result.uniqueVoicesCount
};
}
return {
job,
outputUrl: undefined,
totalDuration: undefined,
segmentsCount: segments.length,
uniqueVoicesCount: new Set(segments.map(s => s.voiceId)).size
};
}
/**
* Create a voice collection
*/
async createVoiceCollection(request) {
const { name, description, isPublic = false, color, icon, voiceIds = [] } = request;
// Validate inputs
if (!name || name.trim().length === 0) {
throw new Error('Collection name cannot be empty');
}
if (name.length > 100) {
throw new Error('Collection name cannot exceed 100 characters');
}
if (description && description.length > 500) {
throw new Error('Description cannot exceed 500 characters');
}
if (color && !/^#[0-9A-Fa-f]{6}$/.test(color)) {
throw new Error('Color must be a valid hex color code (e.g., #FF0000)');
}
if (icon && icon.length > 50) {
throw new Error('Icon identifier cannot exceed 50 characters');
}
const data = {
name: name.trim(),
is_public: isPublic,
voice_ids: voiceIds
};
if (description) {
data.description = description.trim();
}
if (color) {
data.color = color;
}
if (icon) {
data.icon = icon;
}
const result = await this.client.post('/voice/collections', data);
return result;
}
/**
* List voice collections
*/
async listVoiceCollections(options = {}) {
const { includeVoices = false, includePublic = true } = options;
const params = {
include_voices: includeVoices,
include_public: includePublic
};
return this.client.get('/voice/collections', params);
}
/**
* Update a voice collection
*/
async updateVoiceCollection(collectionId, updates) {
const data = {};
if (updates.name !== undefined) {
if (!updates.name || updates.name.trim().length === 0) {
throw new Error('Collection name cannot be empty');
}
data.name = updates.name.trim();
}
if (updates.description !== undefined) {
data.description = updates.description?.trim();
}
if (updates.isPublic !== undefined) {
data.is_public = updates.isPublic;
}
if (updates.color !== undefined) {
if (updates.color && !/^#[0-9A-Fa-f]{6}$/.test(updates.color)) {
throw new Error('Color must be a valid hex color code');
}
data.color = updates.color;
}
if (updates.icon !== undefined) {
data.icon = updates.icon;
}
const result = await this.client.put(`/voice/collections/${collectionId}`, data);
return result;
}
/**
* Delete a voice collection
*/
async deleteVoiceCollection(collectionId) {
await this.client.delete(`/voice/collections/${collectionId}`);
}
/**
* Add voices to a collection
*/
async addVoicesToCollection(collectionId, voiceIds) {
if (!voiceIds || voiceIds.length === 0) {
throw new Error('At least one voice ID must be provided');
}
const data = {
voice_ids: voiceIds
};
await this.client.post(`/voice/collections/${collectionId}/voices`, data);
}
/**
* Remove voices from a collection
*/
async removeVoicesFromCollection(collectionId, voiceIds) {
if (!voiceIds || voiceIds.length === 0) {
throw new Error('At least one voice ID must be provided');
}
// Use post method to remove voices from collection
await this.client.post(`/voice/collections/${collectionId}/voices/remove`, {
voice_ids: voiceIds
});
}
/**
* Generate public voice preview (no authentication required)
*/
async generatePublicPreview(request) {
const { voiceIdentifier, inputText, targetLanguage = 'en' } = request;
// Validate inputs
if (!voiceIdentifier || voiceIdentifier.trim().length === 0) {
throw new Error('Voice identifier cannot be empty');
}
if (!inputText || inputText.trim().length === 0) {
throw new Error('Input text cannot be empty');
}
if (inputText.length > 2000) {
throw new Error('Input text cannot exceed 2000 characters');
}
const formData = {
voice_identifier: voiceIdentifier.trim(),
input_text: inputText.trim(),
target_language: targetLanguage
};
const result = await this.client.post('/voice/public/preview-generate', formData);
return {
audioUrl: result.audio_url || result.audioUrl,
duration: result.duration
};
}
/**
* Populate provider voices (admin only)
*/
async populateProviderVoices(provider) {
if (!['openai', 'google_gemini', 'audiopod_sonic'].includes(provider)) {
throw new Error('Invalid provider. Must be openai, google_gemini, or audiopod_sonic');
}
const result = await this.client.post(`/voice/voices/populate/${provider}`);
return {
message: result.message,
count: result.voices_created || result.count || 0
};
}
/**
* Get unified TTS job status (works for both single and multi-voice TTS)
*/
async getUnifiedJobStatus(jobId) {
return this.client.get(`/voice/tts-jobs/${jobId}/status`);
}
/**
* Get unified TTS history (both single and multi-voice jobs)
*/
async getUnifiedTTSHistory(options = {}) {
const { limit = 50, offset = 0, status, jobType } = options;
const params = {
limit,
offset
};
if (status) {
params.status = status;
}
if (jobType) {
params.job_type = jobType;
}
return this.client.get('/voice/tts-jobs/history', params);
}
}
/**
* Music Service
* Handles music generation operations
*/
class MusicService {
constructor(client) {
this.client = client;
}
/**
* Generate music from text prompt
*/
async generateMusic(request) {
const { prompt, duration = 120.0, guidanceScale = 7.5, numInferenceSteps = 50, seed, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Music prompt cannot be empty');
}
if (duration !== -1 && (duration < 1.0 || duration > 600.0)) {
throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds');
}
if (guidanceScale < 1.0 || guidanceScale > 30.0) {
throw new Error('Guidance scale must be between 1.0 and 30.0');
}
if (numInferenceSteps < 1 || numInferenceSteps > 200) {
throw new Error('Inference steps must be between 1 and 200');
}
if (seed !== undefined && (seed < 0 || seed > Math.pow(2, 32) - 1)) {
throw new Error('Seed must be between 0 and 2^32 - 1');
}
const data = {
prompt: prompt.trim(),
audio_duration: duration,
guidance_scale: guidanceScale,
infer_step: numInferenceSteps,
lyrics: '',
format: 'wav',
scheduler_type: 'euler',
cfg_type: 'apg',
omega_scale: 10,
guidance_interval: 0.5,
guidance_interval_decay: 0.0,
min_guidance_scale: 3.0,
guidance_scale_text: 0.0,
guidance_scale_lyric: 0.0,
use_erg_tag: true,
use_erg_lyric: true,
use_erg_diffusion: true,
lora_name_or_path: 'none',
lora_weight: 1.0,
torch_compile: false,
cpu_offload: false,
overlapped_decode: false,
debug: false
};
if (seed !== undefined) {
data.manual_seeds = [seed];
}
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/text2music', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return { job };
}
/**
* Generate rap music from prompt and lyrics
*/
async generateRap(request) {
const { prompt, lyrics, duration = 120.0, style, tempo, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Rap prompt cannot be empty');
}
if (!lyrics || lyrics.trim().length === 0) {
throw new Error('Rap lyrics cannot be empty');
}
if (duration !== -1 && (duration < 1.0 || duration > 600.0)) {
throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds');
}
const data = {
prompt: prompt.trim(),
lyrics: lyrics.trim(),
audio_duration: duration,
format: 'wav',
// Use rap-specific LoRA by default
lora_name_or_path: 'ACE-Step/ACE-Step-v1-chinese-rap-LoRA',
lora_weight: 1.0
};
if (style) {
data.style = style;
}
if (tempo) {
data.tempo = tempo;
}
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/text2rap', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Generate instrumental music from prompt
*/
async generateInstrumental(request) {
const { prompt, duration = 120.0, instruments, key, tempo, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Instrumental prompt cannot be empty');
}
if (duration !== -1 && (duration < 1.0 || duration > 600.0)) {
throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds');
}
const data = {
prompt: prompt.trim(),
audio_duration: duration,
format: 'wav',
lyrics: '' // Empty for instrumental
};
if (instruments) {
data.instruments = instruments;
}
if (key) {
data.key = key;
}
if (tempo) {
data.tempo = tempo;
}
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/prompt2instrumental', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Generate vocals from prompt and lyrics
*/
async generateVocals(request) {
const { prompt, lyrics, duration = 120.0, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Vocals prompt cannot be empty');
}
if (!lyrics || lyrics.trim().length === 0) {
throw new Error('Vocals lyrics cannot be empty');
}
if (duration !== -1 && (duration < 1.0 || duration > 600.0)) {
throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds');
}
const data = {
prompt: prompt.trim(),
lyrics: lyrics.trim(),
audio_duration: duration,
format: 'wav'
};
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/lyric2vocals', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Generate audio samples from prompt
*/
async generateSamples(request) {
const { prompt, sampleType = 'loop', tempo = 128, duration = 8.0, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Samples prompt cannot be empty');
}
if (duration !== -1 && (duration < 1.0 || duration > 120.0)) {
throw new Error('Duration must be -1 (auto) or between 1 and 120 seconds for samples');
}
const data = {
prompt: prompt.trim(),
sample_type: sampleType,
tempo: tempo,
audio_duration: duration,
format: 'wav'
};
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/text2samples', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Transform audio to audio with new characteristics
*/
async generateAudio2Audio(request) {
const { sourceFile, prompt, refAudioStrength = 0.7, audioDuration = 120.0, inferStep = 60, guidanceScale = 15.0, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Audio2audio prompt cannot be empty');
}
let fileData;
if (typeof sourceFile === 'string') {
const fs = await import('fs');
const path = await import('path');
if (!fs.existsSync(sourceFile)) {
throw new Error(`Source audio file not found: ${sourceFile}`);
}
const fileBuffer = fs.readFileSync(sourceFile);
const fileName = path.basename(sourceFile);
fileData = new Blob([fileBuffer], { type: 'audio/*' });
fileData.name = fileName;
}
else {
fileData = sourceFile;
}
const formData = {
prompt: prompt.trim(),
ref_audio_strength: refAudioStrength,
audio_duration: audioDuration,
infer_step: inferStep,
guidance_scale: guidanceScale
};
if (displayName) {
formData.display_name = displayName.trim();
}
const job = await this.client.uploadFile('/music/audio2audio', fileData, formData);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Generate music using SongBloom with reference audio
*/
async generateSongBloom(request) {
const { lyrics, referenceFile, duration = 120.0, guidanceScale = 7.5, numInferenceSteps = 50, seed, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!lyrics || lyrics.trim().length === 0) {
throw new Error('SongBloom lyrics cannot be empty');
}
if (duration !== -1 && (duration < 10.0 || duration > 600.0)) {
throw new Error('Duration must be -1 (auto) or between 10 and 600 seconds for SongBloom');
}
let fileData;
if (typeof referenceFile === 'string') {
const fs = await import('fs');
const path = await import('path');
if (!fs.existsSync(referenceFile)) {
throw new Error(`Reference audio file not found: ${referenceFile}`);
}
const fileBuffer = fs.readFileSync(referenceFile);
const fileName = path.basename(referenceFile);
fileData = new Blob([fileBuffer], { type: 'audio/*' });
fileData.name = fileName;
}
else {
fileData = referenceFile;
}
const formData = {
lyrics: lyrics.trim(),
duration: duration,
guidance_scale: guidanceScale,
num_inference_steps: numInferenceSteps
};
if (seed !== undefined) {
formData.seed = seed;
}
if (displayName) {
formData.display_name = displayName.trim();
}
const job = await this.client.uploadFile('/music/songbloom', fileData, formData);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Create a retake/variation of existing music
*/
async retakeMusic(request) {
const { originalJobId, retakeVariance = 0.5, retakeSeeds, displayName, waitForCompletion = false, timeout = 600000 } = request;
const data = {
original_job_id: originalJobId,
retake_variance: retakeVariance
};
if (retakeSeeds) {
data.retake_seeds = retakeSeeds;
}
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/retake', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Repaint sections of existing music
*/
async repaintMusic(request) {
const { sourceJobId, repaintStart, repaintEnd, prompt, lyrics = '', displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Repaint prompt cannot be empty');
}
if (repaintEnd <= repaintStart) {
throw new Error('Repaint end time must be greater than start time');
}
const data = {
source_job_id: sourceJobId,
repaint_start: repaintStart,
repaint_end: repaintEnd,
prompt: prompt.trim(),
lyrics: lyrics.trim()
};
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/repaint', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Extend existing music with additional content
*/
async extendMusic(request) {
const { sourceJobId, leftExtendLength = 0.0, rightExtendLength = 30.0, prompt, lyrics = '', extendSeeds, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!prompt || prompt.trim().length === 0) {
throw new Error('Extend prompt cannot be empty');
}
if (leftExtendLength < 0 || leftExtendLength > 60) {
throw new Error('Left extend length must be between 0 and 60 seconds');
}
if (rightExtendLength < 0 || rightExtendLength > 60) {
throw new Error('Right extend length must be between 0 and 60 seconds');
}
const data = {
source_job_id: sourceJobId,
left_extend_length: leftExtendLength,
right_extend_length: rightExtendLength,
prompt: prompt.trim(),
lyrics: lyrics.trim()
};
if (extendSeeds) {
data.extend_seeds = extendSeeds;
}
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/extend', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Edit existing music with new style or lyrics
*/
async editMusic(request) {
const { sourceJobId, editTargetPrompt, editTargetLyrics = '', editType = 'remix', editNMin = 0.0, editNMax = 1.0, editNAvg = 1, displayName, waitForCompletion = false, timeout = 600000 } = request;
// Validate inputs
if (!editTargetPrompt || editTargetPrompt.trim().length === 0) {
throw new Error('Edit target prompt cannot be empty');
}
if (!['remix', 'only_lyrics'].includes(editType)) {
throw new Error('Edit type must be "remix" or "only_lyrics"');
}
const data = {
source_job_id: sourceJobId,
edit_target_prompt: editTargetPrompt.trim(),
edit_target_lyrics: editTargetLyrics.trim(),
edit_type: editType,
edit_n_min: editNMin,
edit_n_max: editNMax,
edit_n_avg: editNAvg
};
if (displayName) {
data.display_name = displayName.trim();
}
const job = await this.client.post('/music/edit', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
outputUrl: result.output_url || result.outputUrl,
outputUrls: result.output_urls || result.outputUrls,
audioDuration: result.audio_duration || result.audioDuration,
actualSeeds: result.actual_seeds || result.actualSeeds,
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl,
isShared: result.is_shared || result.isShared || false
};
}
return {
job,
outputUrl: undefined,
outputUrls: undefined,
audioDuration: undefined,
actualSeeds: undefined,
shareToken: undefined,
shareUrl: undefined,
isShared: false
};
}
/**
* Get music generation job status
*/
async getJobStatus(jobId) {
return this.client.get(`/music/jobs/${jobId}/status`);
}
/**
* List music generation jobs
*/
async listJobs(options = {}) {
const { limit = 50, offset = 0, status } = options;
const params = {
limit,
offset
};
if (status) {
params.status = status;
}
return this.client.get('/music/jobs', params);
}
/**
* Like a music track
*/
async likeMusicTrack(jobId) {
const result = await this.client.post(`/music/jobs/${jobId}/like`);
return {
liked: result.liked,
totalLikes: result.total_likes || result.totalLikes
};
}
/**
* Unlike a music track
*/
async unlikeMusicTrack(jobId) {
const result = await this.client.delete(`/music/jobs/${jobId}/like`);
return {
liked: result.liked,
totalLikes: result.total_likes || result.totalLikes
};
}
/**
* Share a music track
*/
async shareMusicTrack(jobId, options = {}) {
const result = await this.client.post(`/music/jobs/${jobId}/share`, options);
return {
shareToken: result.share_token || result.shareToken,
shareUrl: result.share_url || result.shareUrl
};
}
/**
* Get music track statistics
*/
async getMusicTrackStats(jobId) {
return this.client.get(`/music/jobs/${jobId}/stats`);
}
/**
* Delete a music generation job
*/
async deleteMusicJob(jobId) {
await this.client.delete(`/music/jobs/${jobId}`);
}
/**
* Get shared music track (public access)
*/
async getSharedTrack(shareToken) {
const job = await this.client.get(`/music/share/${shareToken}`);
return {
job,
outputUrl: job.output_url || job.outputUrl,
outputUrls: job.output_urls || job.outputUrls,
audioDuration: job.audio_duration || job.audioDuration,
isShared: true,
shareToken,
shareUrl: job.share_url || job.shareUrl
};
}
}
/**
* Transcription Service
* Handles audio transcription operations
*/
class TranscriptionService {
constructor(client) {
this.client = client;
}
/**
* Transcribe audio to text
*/
async transcribeAudio(request) {
const { audioFile, language, modelType = 'whisperx', enableSpeakerDiarization = false, enableWordTimestamps = true, waitForCompletion = false, timeout = 600000 } = request;
let fileData;
if (typeof audioFile === 'string') {
const fs = await import('fs');
const path = await import('path');
if (!fs.existsSync(audioFile)) {
throw new Error(`Audio file not found: ${audioFile}`);
}
const fileBuffer = fs.readFileSync(audioFile);
const fileName = path.basename(audioFile);
fileData = new Blob([fileBuffer], { type: 'audio/*' });
fileData.name = fileName;
}
else {
fileData = audioFile;
}
const formData = {
model_type: modelType,
enable_speaker_diarization: enableSpeakerDiarization,
enable_word_timestamps: enableWordTimestamps
};
if (language) {
formData.language = language.toLowerCase();
}
const job = await this.client.uploadFile('/transcription/transcribe-upload', fileData, formData);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
transcript: result.transcript,
detectedLanguage: result.detected_language || result.detectedLanguage,
confidenceScore: result.confidence_score || result.confidenceScore,
segments: result.segments,
audioDuration: result.total_duration || result.audioDuration
};
}
return { job };
}
/**
* Transcribe from URL
*/
async transcribeUrl(url, options = {}) {
const { language, modelType = 'whisperx', enableSpeakerDiarization = false, waitForCompletion = false, timeout = 600000 } = options;
const data = {
source_urls: [url],
model_type: modelType,
enable_speaker_diarization: enableSpeakerDiarization
};
if (language) {
data.language = language.toLowerCase();
}
const job = await this.client.post('/transcription/transcribe', data);
if (waitForCompletion) {
const result = await this.client.waitForJobCompletion(job.id, timeout);
return {
job: { ...job, ...result },
transcript: result.transcript,
detectedLanguage: