UNPKG

audiopod-sdk

Version:

AudioPod SDK for Node.js and React - Professional Audio Processing powered by AI

audiopod-ai/audiopod

1,315 lines (1,311 loc) • 83.5 kB

JavaScript

import axios from 'axios'; import FormData from 'form-data'; import { useState, useEffect, useRef, useCallback } from 'react'; /** * AudioPod SDK Types * Type definitions for the AudioPod API */ var JobStatus; (function (JobStatus) { JobStatus["PENDING"] = "pending"; JobStatus["PROCESSING"] = "processing"; JobStatus["COMPLETED"] = "completed"; JobStatus["FAILED"] = "failed"; JobStatus["CANCELLED"] = "cancelled"; })(JobStatus || (JobStatus = {})); var VoiceType; (function (VoiceType) { VoiceType["CUSTOM"] = "custom"; VoiceType["STANDARD"] = "standard"; })(VoiceType || (VoiceType = {})); var TTSProvider; (function (TTSProvider) { TTSProvider["AUDIOPOD_SONIC"] = "audiopod_sonic"; TTSProvider["OPENAI"] = "openai"; TTSProvider["GOOGLE_GEMINI"] = "google_gemini"; })(TTSProvider || (TTSProvider = {})); /** * Voice Service * Handles voice cloning and TTS operations */ class VoiceService { constructor(client) { this.client = client; } /** * Clone a voice from an audio file * This method first creates a voice profile, then uses it for cloning */ async cloneVoice(request) { const { voiceFile, text, language, speed = 1.0, waitForCompletion = false, timeout = 300000 } = request; // Validate inputs if (!text || text.trim().length === 0) { throw new Error('Text input cannot be empty'); } // Step 1: Create a temporary voice profile from the uploaded file const voiceProfileName = `temp_voice_${Date.now()}`; const voiceProfile = await this.createVoiceProfile(voiceProfileName, voiceFile, 'Temporary voice profile for cloning', false, // not public true, // wait for completion timeout); // Step 2: Use the voice profile for cloning const data = { voice_id: voiceProfile.id, input_text: text.trim(), target_language: language?.toLowerCase(), generation_params: speed !== 1.0 ? { speed } : undefined }; const job = await this.client.post('/voice/voice-clone', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, duration: result.duration }; } return { job }; } /** * Create a reusable voice profile */ async createVoiceProfile(name, voiceFile, description, isPublic = false, waitForCompletion = false, timeout = 600000) { if (!name || name.trim().length === 0) { throw new Error('Voice profile name cannot be empty'); } if (name.length > 100) { throw new Error('Voice profile name too long (max 100 characters)'); } let fileData; if (typeof voiceFile === 'string') { const fs = await import('fs'); const path = await import('path'); if (!fs.existsSync(voiceFile)) { throw new Error(`Voice file not found: ${voiceFile}`); } const fileBuffer = fs.readFileSync(voiceFile); const fileName = path.basename(voiceFile); fileData = new Blob([fileBuffer], { type: 'audio/*' }); fileData.name = fileName; } else { fileData = voiceFile; } const formData = { name: name.trim(), is_public: isPublic }; if (description) { formData.description = description.trim(); } const response = await this.client.uploadFile('/voice/voice-profiles', fileData, formData); if (waitForCompletion) { // Poll for voice profile completion const startTime = Date.now(); while (Date.now() - startTime < timeout) { const voiceProfile = await this.getVoiceProfile(response.id); if (voiceProfile.status === 'completed') { return voiceProfile; } else if (voiceProfile.status === 'failed') { throw new Error(`Voice profile creation failed: ${voiceProfile.status}`); } await new Promise(resolve => setTimeout(resolve, 5000)); } throw new Error('Voice profile creation timed out'); } return response; } /** * Generate speech using an existing voice profile */ async generateSpeech(voiceId, text, options = {}) { const { language, speed = 1.0, audioFormat = 'mp3', waitForCompletion = false, timeout = 300000 } = options; if (!text || text.trim().length === 0) { throw new Error('Text input cannot be empty'); } if (speed < 0.5 || speed > 2.0) { throw new Error('Speed must be between 0.5 and 2.0'); } const data = { input_text: text.trim(), speed, audio_format: audioFormat }; if (language) { data.language = language.toLowerCase(); } const response = await this.client.post(`/voice/voices/${voiceId}/generate`, data); if (response.job_id) { // It's an async job const job = { id: response.job_id, ...response }; if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, duration: result.duration }; } return { job }; } else { // Direct response with audio URL return { job: { id: 0, status: 'completed' }, outputUrl: response.output_url || response.outputUrl, duration: response.duration }; } } /** * List available voice profiles */ async listVoiceProfiles(options = {}) { const { voiceType, isPublic, includePublic = true, limit = 50, skip = 0 } = options; const params = { limit, skip, include_public: includePublic }; if (voiceType) { params.voice_type = voiceType; } if (isPublic !== undefined) { params.is_public = isPublic; } return this.client.get('/voice/voice-profiles', params); } /** * Get details of a specific voice profile */ async getVoiceProfile(voiceId) { return this.client.get(`/voice/voice-profiles/${voiceId}`); } /** * Delete a voice profile */ async deleteVoiceProfile(voiceId) { await this.client.delete(`/voice/voices/${voiceId}`); } /** * Get voice cloning job status */ async getJobStatus(jobId) { return this.client.get(`/voice/clone/${jobId}/status`); } /** * List voice cloning jobs */ async listCloningJobs(options = {}) { const params = {}; if (options.skip !== undefined) { params.skip = options.skip; } if (options.limit !== undefined) { params.limit = options.limit; } if (options.status) { params.status = options.status; } return this.client.get('/voice/clone/jobs', params); } /** * Stream voice generation (WebSocket connection) */ async streamVoiceGeneration(voiceId, text, options = {}) { const { language, speed = 1.0, onProgress, onAudioChunk, onComplete, onError } = options; try { const WebSocket = (await import('ws')).default; const config = this.client.getConfig(); const wsUrl = `${config.baseURL.replace('http', 'ws')}/api/v1/voice/ws/stream`; const ws = new WebSocket(wsUrl, { headers: { 'Authorization': `Bearer ${config.apiKey}` } }); ws.on('open', () => { const message = { type: 'voice_generation', data: { voice_id: voiceId, input_text: text, target_language: language, speed } }; ws.send(JSON.stringify(message)); }); ws.on('message', (data) => { try { const message = JSON.parse(data.toString()); switch (message.type) { case 'progress': if (onProgress) { onProgress(message.data.progress); } break; case 'audio_chunk': if (onAudioChunk) { const audioData = Buffer.from(message.data.chunk, 'base64'); onAudioChunk(audioData.buffer); } break; case 'complete': if (onComplete) { onComplete(message.data); } ws.close(); break; case 'error': if (onError) { onError(new Error(message.data.message)); } ws.close(); break; } } catch (error) { if (onError) { onError(error); } } }); ws.on('error', (error) => { if (onError) { onError(error); } }); } catch (error) { if (onError) { onError(error); } } } /** * Convert source audio to match target voice characteristics */ async convertVoice(request) { const { sourceFile, url, voiceUuid, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!sourceFile && !url) { throw new Error('Either sourceFile or url must be provided'); } if (sourceFile && url) { throw new Error('Cannot provide both sourceFile and url'); } if (!voiceUuid || voiceUuid.trim().length === 0) { throw new Error('Voice UUID cannot be empty'); } let job; if (url) { // URL-based conversion const formData = { url, voice_uuid: voiceUuid.trim() }; job = await this.client.post('/voice/voice-convert', formData); } else { // File-based conversion let fileData; if (typeof sourceFile === 'string') { const fs = await import('fs'); const path = await import('path'); if (!fs.existsSync(sourceFile)) { throw new Error(`Source audio file not found: ${sourceFile}`); } const fileBuffer = fs.readFileSync(sourceFile); const fileName = path.basename(sourceFile); fileData = new Blob([fileBuffer], { type: 'audio/*' }); fileData.name = fileName; } else { fileData = sourceFile; } const formData = { voice_uuid: voiceUuid.trim() }; job = await this.client.uploadFile('/voice/voice-convert', fileData, formData); } if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_path || result.outputUrl || result.output_url }; } return { job, outputUrl: undefined }; } /** * Generate multi-voice TTS with multiple speakers */ async generateMultiVoiceTTS(request) { const { segments, mixMode = 'sequential', outputFormat = 'wav', silenceDuration = 0.5, normalizeVolume = true, generationParams = {}, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!segments || segments.length === 0) { throw new Error('At least one segment must be provided'); } if (segments.length > 50) { throw new Error('Maximum 50 segments allowed'); } for (const segment of segments) { if (!segment.text || segment.text.trim().length === 0) { throw new Error('Segment text cannot be empty'); } if (!segment.voiceId) { throw new Error('Voice ID must be specified for each segment'); } } if (!['sequential', 'parallel', 'timed'].includes(mixMode)) { throw new Error('Mix mode must be sequential, parallel, or timed'); } if (!['wav', 'mp3', 'ogg'].includes(outputFormat)) { throw new Error('Output format must be wav, mp3, or ogg'); } const formData = { segments: JSON.stringify(segments), mix_mode: mixMode, output_format: outputFormat, silence_duration: silenceDuration, normalize_volume: normalizeVolume, generation_params: JSON.stringify(generationParams) }; const job = await this.client.post('/voice/multi-voice-tts', formData); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_path || result.outputUrl || result.output_url, totalDuration: result.total_duration || result.totalDuration, segmentsCount: result.segments_count || result.segmentsCount || segments.length, uniqueVoicesCount: result.unique_voices_count || result.uniqueVoicesCount }; } return { job, outputUrl: undefined, totalDuration: undefined, segmentsCount: segments.length, uniqueVoicesCount: new Set(segments.map(s => s.voiceId)).size }; } /** * Create a voice collection */ async createVoiceCollection(request) { const { name, description, isPublic = false, color, icon, voiceIds = [] } = request; // Validate inputs if (!name || name.trim().length === 0) { throw new Error('Collection name cannot be empty'); } if (name.length > 100) { throw new Error('Collection name cannot exceed 100 characters'); } if (description && description.length > 500) { throw new Error('Description cannot exceed 500 characters'); } if (color && !/^#[0-9A-Fa-f]{6}$/.test(color)) { throw new Error('Color must be a valid hex color code (e.g., #FF0000)'); } if (icon && icon.length > 50) { throw new Error('Icon identifier cannot exceed 50 characters'); } const data = { name: name.trim(), is_public: isPublic, voice_ids: voiceIds }; if (description) { data.description = description.trim(); } if (color) { data.color = color; } if (icon) { data.icon = icon; } const result = await this.client.post('/voice/collections', data); return result; } /** * List voice collections */ async listVoiceCollections(options = {}) { const { includeVoices = false, includePublic = true } = options; const params = { include_voices: includeVoices, include_public: includePublic }; return this.client.get('/voice/collections', params); } /** * Update a voice collection */ async updateVoiceCollection(collectionId, updates) { const data = {}; if (updates.name !== undefined) { if (!updates.name || updates.name.trim().length === 0) { throw new Error('Collection name cannot be empty'); } data.name = updates.name.trim(); } if (updates.description !== undefined) { data.description = updates.description?.trim(); } if (updates.isPublic !== undefined) { data.is_public = updates.isPublic; } if (updates.color !== undefined) { if (updates.color && !/^#[0-9A-Fa-f]{6}$/.test(updates.color)) { throw new Error('Color must be a valid hex color code'); } data.color = updates.color; } if (updates.icon !== undefined) { data.icon = updates.icon; } const result = await this.client.put(`/voice/collections/${collectionId}`, data); return result; } /** * Delete a voice collection */ async deleteVoiceCollection(collectionId) { await this.client.delete(`/voice/collections/${collectionId}`); } /** * Add voices to a collection */ async addVoicesToCollection(collectionId, voiceIds) { if (!voiceIds || voiceIds.length === 0) { throw new Error('At least one voice ID must be provided'); } const data = { voice_ids: voiceIds }; await this.client.post(`/voice/collections/${collectionId}/voices`, data); } /** * Remove voices from a collection */ async removeVoicesFromCollection(collectionId, voiceIds) { if (!voiceIds || voiceIds.length === 0) { throw new Error('At least one voice ID must be provided'); } // Use post method to remove voices from collection await this.client.post(`/voice/collections/${collectionId}/voices/remove`, { voice_ids: voiceIds }); } /** * Generate public voice preview (no authentication required) */ async generatePublicPreview(request) { const { voiceIdentifier, inputText, targetLanguage = 'en' } = request; // Validate inputs if (!voiceIdentifier || voiceIdentifier.trim().length === 0) { throw new Error('Voice identifier cannot be empty'); } if (!inputText || inputText.trim().length === 0) { throw new Error('Input text cannot be empty'); } if (inputText.length > 2000) { throw new Error('Input text cannot exceed 2000 characters'); } const formData = { voice_identifier: voiceIdentifier.trim(), input_text: inputText.trim(), target_language: targetLanguage }; const result = await this.client.post('/voice/public/preview-generate', formData); return { audioUrl: result.audio_url || result.audioUrl, duration: result.duration }; } /** * Populate provider voices (admin only) */ async populateProviderVoices(provider) { if (!['openai', 'google_gemini', 'audiopod_sonic'].includes(provider)) { throw new Error('Invalid provider. Must be openai, google_gemini, or audiopod_sonic'); } const result = await this.client.post(`/voice/voices/populate/${provider}`); return { message: result.message, count: result.voices_created || result.count || 0 }; } /** * Get unified TTS job status (works for both single and multi-voice TTS) */ async getUnifiedJobStatus(jobId) { return this.client.get(`/voice/tts-jobs/${jobId}/status`); } /** * Get unified TTS history (both single and multi-voice jobs) */ async getUnifiedTTSHistory(options = {}) { const { limit = 50, offset = 0, status, jobType } = options; const params = { limit, offset }; if (status) { params.status = status; } if (jobType) { params.job_type = jobType; } return this.client.get('/voice/tts-jobs/history', params); } } /** * Music Service * Handles music generation operations */ class MusicService { constructor(client) { this.client = client; } /** * Generate music from text prompt */ async generateMusic(request) { const { prompt, duration = 120.0, guidanceScale = 7.5, numInferenceSteps = 50, seed, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Music prompt cannot be empty'); } if (duration !== -1 && (duration < 1.0 || duration > 600.0)) { throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds'); } if (guidanceScale < 1.0 || guidanceScale > 30.0) { throw new Error('Guidance scale must be between 1.0 and 30.0'); } if (numInferenceSteps < 1 || numInferenceSteps > 200) { throw new Error('Inference steps must be between 1 and 200'); } if (seed !== undefined && (seed < 0 || seed > Math.pow(2, 32) - 1)) { throw new Error('Seed must be between 0 and 2^32 - 1'); } const data = { prompt: prompt.trim(), audio_duration: duration, guidance_scale: guidanceScale, infer_step: numInferenceSteps, lyrics: '', format: 'wav', scheduler_type: 'euler', cfg_type: 'apg', omega_scale: 10, guidance_interval: 0.5, guidance_interval_decay: 0.0, min_guidance_scale: 3.0, guidance_scale_text: 0.0, guidance_scale_lyric: 0.0, use_erg_tag: true, use_erg_lyric: true, use_erg_diffusion: true, lora_name_or_path: 'none', lora_weight: 1.0, torch_compile: false, cpu_offload: false, overlapped_decode: false, debug: false }; if (seed !== undefined) { data.manual_seeds = [seed]; } if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/text2music', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job }; } /** * Generate rap music from prompt and lyrics */ async generateRap(request) { const { prompt, lyrics, duration = 120.0, style, tempo, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Rap prompt cannot be empty'); } if (!lyrics || lyrics.trim().length === 0) { throw new Error('Rap lyrics cannot be empty'); } if (duration !== -1 && (duration < 1.0 || duration > 600.0)) { throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds'); } const data = { prompt: prompt.trim(), lyrics: lyrics.trim(), audio_duration: duration, format: 'wav', // Use rap-specific LoRA by default lora_name_or_path: 'ACE-Step/ACE-Step-v1-chinese-rap-LoRA', lora_weight: 1.0 }; if (style) { data.style = style; } if (tempo) { data.tempo = tempo; } if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/text2rap', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Generate instrumental music from prompt */ async generateInstrumental(request) { const { prompt, duration = 120.0, instruments, key, tempo, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Instrumental prompt cannot be empty'); } if (duration !== -1 && (duration < 1.0 || duration > 600.0)) { throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds'); } const data = { prompt: prompt.trim(), audio_duration: duration, format: 'wav', lyrics: '' // Empty for instrumental }; if (instruments) { data.instruments = instruments; } if (key) { data.key = key; } if (tempo) { data.tempo = tempo; } if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/prompt2instrumental', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Generate vocals from prompt and lyrics */ async generateVocals(request) { const { prompt, lyrics, duration = 120.0, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Vocals prompt cannot be empty'); } if (!lyrics || lyrics.trim().length === 0) { throw new Error('Vocals lyrics cannot be empty'); } if (duration !== -1 && (duration < 1.0 || duration > 600.0)) { throw new Error('Duration must be -1 (auto) or between 1 and 600 seconds'); } const data = { prompt: prompt.trim(), lyrics: lyrics.trim(), audio_duration: duration, format: 'wav' }; if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/lyric2vocals', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Generate audio samples from prompt */ async generateSamples(request) { const { prompt, sampleType = 'loop', tempo = 128, duration = 8.0, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Samples prompt cannot be empty'); } if (duration !== -1 && (duration < 1.0 || duration > 120.0)) { throw new Error('Duration must be -1 (auto) or between 1 and 120 seconds for samples'); } const data = { prompt: prompt.trim(), sample_type: sampleType, tempo: tempo, audio_duration: duration, format: 'wav' }; if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/text2samples', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Transform audio to audio with new characteristics */ async generateAudio2Audio(request) { const { sourceFile, prompt, refAudioStrength = 0.7, audioDuration = 120.0, inferStep = 60, guidanceScale = 15.0, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Audio2audio prompt cannot be empty'); } let fileData; if (typeof sourceFile === 'string') { const fs = await import('fs'); const path = await import('path'); if (!fs.existsSync(sourceFile)) { throw new Error(`Source audio file not found: ${sourceFile}`); } const fileBuffer = fs.readFileSync(sourceFile); const fileName = path.basename(sourceFile); fileData = new Blob([fileBuffer], { type: 'audio/*' }); fileData.name = fileName; } else { fileData = sourceFile; } const formData = { prompt: prompt.trim(), ref_audio_strength: refAudioStrength, audio_duration: audioDuration, infer_step: inferStep, guidance_scale: guidanceScale }; if (displayName) { formData.display_name = displayName.trim(); } const job = await this.client.uploadFile('/music/audio2audio', fileData, formData); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Generate music using SongBloom with reference audio */ async generateSongBloom(request) { const { lyrics, referenceFile, duration = 120.0, guidanceScale = 7.5, numInferenceSteps = 50, seed, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!lyrics || lyrics.trim().length === 0) { throw new Error('SongBloom lyrics cannot be empty'); } if (duration !== -1 && (duration < 10.0 || duration > 600.0)) { throw new Error('Duration must be -1 (auto) or between 10 and 600 seconds for SongBloom'); } let fileData; if (typeof referenceFile === 'string') { const fs = await import('fs'); const path = await import('path'); if (!fs.existsSync(referenceFile)) { throw new Error(`Reference audio file not found: ${referenceFile}`); } const fileBuffer = fs.readFileSync(referenceFile); const fileName = path.basename(referenceFile); fileData = new Blob([fileBuffer], { type: 'audio/*' }); fileData.name = fileName; } else { fileData = referenceFile; } const formData = { lyrics: lyrics.trim(), duration: duration, guidance_scale: guidanceScale, num_inference_steps: numInferenceSteps }; if (seed !== undefined) { formData.seed = seed; } if (displayName) { formData.display_name = displayName.trim(); } const job = await this.client.uploadFile('/music/songbloom', fileData, formData); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Create a retake/variation of existing music */ async retakeMusic(request) { const { originalJobId, retakeVariance = 0.5, retakeSeeds, displayName, waitForCompletion = false, timeout = 600000 } = request; const data = { original_job_id: originalJobId, retake_variance: retakeVariance }; if (retakeSeeds) { data.retake_seeds = retakeSeeds; } if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/retake', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Repaint sections of existing music */ async repaintMusic(request) { const { sourceJobId, repaintStart, repaintEnd, prompt, lyrics = '', displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Repaint prompt cannot be empty'); } if (repaintEnd <= repaintStart) { throw new Error('Repaint end time must be greater than start time'); } const data = { source_job_id: sourceJobId, repaint_start: repaintStart, repaint_end: repaintEnd, prompt: prompt.trim(), lyrics: lyrics.trim() }; if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/repaint', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Extend existing music with additional content */ async extendMusic(request) { const { sourceJobId, leftExtendLength = 0.0, rightExtendLength = 30.0, prompt, lyrics = '', extendSeeds, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!prompt || prompt.trim().length === 0) { throw new Error('Extend prompt cannot be empty'); } if (leftExtendLength < 0 || leftExtendLength > 60) { throw new Error('Left extend length must be between 0 and 60 seconds'); } if (rightExtendLength < 0 || rightExtendLength > 60) { throw new Error('Right extend length must be between 0 and 60 seconds'); } const data = { source_job_id: sourceJobId, left_extend_length: leftExtendLength, right_extend_length: rightExtendLength, prompt: prompt.trim(), lyrics: lyrics.trim() }; if (extendSeeds) { data.extend_seeds = extendSeeds; } if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/extend', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Edit existing music with new style or lyrics */ async editMusic(request) { const { sourceJobId, editTargetPrompt, editTargetLyrics = '', editType = 'remix', editNMin = 0.0, editNMax = 1.0, editNAvg = 1, displayName, waitForCompletion = false, timeout = 600000 } = request; // Validate inputs if (!editTargetPrompt || editTargetPrompt.trim().length === 0) { throw new Error('Edit target prompt cannot be empty'); } if (!['remix', 'only_lyrics'].includes(editType)) { throw new Error('Edit type must be "remix" or "only_lyrics"'); } const data = { source_job_id: sourceJobId, edit_target_prompt: editTargetPrompt.trim(), edit_target_lyrics: editTargetLyrics.trim(), edit_type: editType, edit_n_min: editNMin, edit_n_max: editNMax, edit_n_avg: editNAvg }; if (displayName) { data.display_name = displayName.trim(); } const job = await this.client.post('/music/edit', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, outputUrl: result.output_url || result.outputUrl, outputUrls: result.output_urls || result.outputUrls, audioDuration: result.audio_duration || result.audioDuration, actualSeeds: result.actual_seeds || result.actualSeeds, shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl, isShared: result.is_shared || result.isShared || false }; } return { job, outputUrl: undefined, outputUrls: undefined, audioDuration: undefined, actualSeeds: undefined, shareToken: undefined, shareUrl: undefined, isShared: false }; } /** * Get music generation job status */ async getJobStatus(jobId) { return this.client.get(`/music/jobs/${jobId}/status`); } /** * List music generation jobs */ async listJobs(options = {}) { const { limit = 50, offset = 0, status } = options; const params = { limit, offset }; if (status) { params.status = status; } return this.client.get('/music/jobs', params); } /** * Like a music track */ async likeMusicTrack(jobId) { const result = await this.client.post(`/music/jobs/${jobId}/like`); return { liked: result.liked, totalLikes: result.total_likes || result.totalLikes }; } /** * Unlike a music track */ async unlikeMusicTrack(jobId) { const result = await this.client.delete(`/music/jobs/${jobId}/like`); return { liked: result.liked, totalLikes: result.total_likes || result.totalLikes }; } /** * Share a music track */ async shareMusicTrack(jobId, options = {}) { const result = await this.client.post(`/music/jobs/${jobId}/share`, options); return { shareToken: result.share_token || result.shareToken, shareUrl: result.share_url || result.shareUrl }; } /** * Get music track statistics */ async getMusicTrackStats(jobId) { return this.client.get(`/music/jobs/${jobId}/stats`); } /** * Delete a music generation job */ async deleteMusicJob(jobId) { await this.client.delete(`/music/jobs/${jobId}`); } /** * Get shared music track (public access) */ async getSharedTrack(shareToken) { const job = await this.client.get(`/music/share/${shareToken}`); return { job, outputUrl: job.output_url || job.outputUrl, outputUrls: job.output_urls || job.outputUrls, audioDuration: job.audio_duration || job.audioDuration, isShared: true, shareToken, shareUrl: job.share_url || job.shareUrl }; } } /** * Transcription Service * Handles audio transcription operations */ class TranscriptionService { constructor(client) { this.client = client; } /** * Transcribe audio to text */ async transcribeAudio(request) { const { audioFile, language, modelType = 'whisperx', enableSpeakerDiarization = false, enableWordTimestamps = true, waitForCompletion = false, timeout = 600000 } = request; let fileData; if (typeof audioFile === 'string') { const fs = await import('fs'); const path = await import('path'); if (!fs.existsSync(audioFile)) { throw new Error(`Audio file not found: ${audioFile}`); } const fileBuffer = fs.readFileSync(audioFile); const fileName = path.basename(audioFile); fileData = new Blob([fileBuffer], { type: 'audio/*' }); fileData.name = fileName; } else { fileData = audioFile; } const formData = { model_type: modelType, enable_speaker_diarization: enableSpeakerDiarization, enable_word_timestamps: enableWordTimestamps }; if (language) { formData.language = language.toLowerCase(); } const job = await this.client.uploadFile('/transcription/transcribe-upload', fileData, formData); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, transcript: result.transcript, detectedLanguage: result.detected_language || result.detectedLanguage, confidenceScore: result.confidence_score || result.confidenceScore, segments: result.segments, audioDuration: result.total_duration || result.audioDuration }; } return { job }; } /** * Transcribe from URL */ async transcribeUrl(url, options = {}) { const { language, modelType = 'whisperx', enableSpeakerDiarization = false, waitForCompletion = false, timeout = 600000 } = options; const data = { source_urls: [url], model_type: modelType, enable_speaker_diarization: enableSpeakerDiarization }; if (language) { data.language = language.toLowerCase(); } const job = await this.client.post('/transcription/transcribe', data); if (waitForCompletion) { const result = await this.client.waitForJobCompletion(job.id, timeout); return { job: { ...job, ...result }, transcript: result.transcript, detectedLanguage: