audiopod-sdk
Version:
AudioPod SDK for Node.js and React - Professional Audio Processing powered by AI
870 lines (849 loc) • 24.1 kB
TypeScript
/**
* AudioPod SDK Types
* Type definitions for the AudioPod API
*/
interface AudioPodConfig {
apiKey: string;
baseURL?: string;
timeout?: number;
maxRetries?: number;
debug?: boolean;
}
interface ApiResponse<T = any> {
data?: T;
error?: string;
message?: string;
status?: string;
}
interface Job {
id: number;
status: JobStatus;
progress: number;
createdAt: string;
updatedAt?: string;
completedAt?: string;
errorMessage?: string;
parameters?: Record<string, any>;
result?: Record<string, any>;
}
declare enum JobStatus {
PENDING = "pending",
PROCESSING = "processing",
COMPLETED = "completed",
FAILED = "failed",
CANCELLED = "cancelled"
}
interface VoiceProfile {
id: number;
uuid: string;
name: string;
displayName?: string;
description?: string;
voiceType: VoiceType;
provider: TTSProvider;
isPublic: boolean;
languageCode?: string;
languageName?: string;
gender?: string;
accent?: string;
createdAt?: string;
status?: JobStatus;
}
declare enum VoiceType {
CUSTOM = "custom",
STANDARD = "standard"
}
declare enum TTSProvider {
AUDIOPOD_SONIC = "audiopod_sonic",
OPENAI = "openai",
GOOGLE_GEMINI = "google_gemini"
}
interface VoiceCloneRequest {
voiceFile: File | Blob | string;
text: string;
language?: string;
speed?: number;
waitForCompletion?: boolean;
timeout?: number;
}
interface VoiceCloneResult {
job: Job;
outputUrl?: string;
duration?: number;
}
interface MusicGenerationRequest {
prompt: string;
duration?: number;
guidanceScale?: number;
numInferenceSteps?: number;
seed?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface MusicGenerationResult {
job: Job;
outputUrl?: string;
outputUrls?: Record<string, string>;
audioDuration?: number;
actualSeeds?: number[];
shareToken?: string;
shareUrl?: string;
isShared?: boolean;
}
interface TranscriptionRequest {
audioFile: File | Blob | string;
language?: string;
modelType?: 'whisperx' | 'faster-whisper';
enableSpeakerDiarization?: boolean;
enableWordTimestamps?: boolean;
waitForCompletion?: boolean;
timeout?: number;
}
interface TranscriptionResult {
job: Job;
transcript?: string;
detectedLanguage?: string;
confidenceScore?: number;
segments?: TranscriptionSegment[];
audioDuration?: number;
}
interface TranscriptionSegment {
text: string;
start: number;
end: number;
speaker?: string;
confidence?: number;
}
interface TranslationRequest {
audioFile: File | Blob | string;
targetLanguage: string;
sourceLanguage?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface TranslationResult {
job: Job;
sourceLanguage?: string;
targetLanguage?: string;
audioOutputUrl?: string;
videoOutputUrl?: string;
transcriptPath?: string;
transcriptUrls?: {
json?: string;
source_audio?: string;
};
}
interface CreditInfo {
balance: number;
paygBalance: number;
totalAvailableCredits: number;
nextResetDate?: string;
totalCreditsUsed: number;
}
interface CreditUsageRecord {
id: string;
creditsUsed: number;
serviceType: string;
audioDuration: number;
jobId?: string;
creditSource: string;
createdAt: string;
usageMetadata?: Record<string, any>;
}
interface AudioPodError {
message: string;
code?: string;
statusCode?: number;
details?: Record<string, any>;
}
interface UploadProgress {
loaded: number;
total: number;
percentage: number;
}
interface JobProgress {
jobId: number;
status: JobStatus;
progress: number;
message?: string;
result?: any;
}
interface UseAudioPodOptions {
apiKey?: string;
onProgress?: (progress: JobProgress) => void;
onError?: (error: AudioPodError) => void;
onSuccess?: (result: any) => void;
}
interface UseVoiceCloningOptions extends UseAudioPodOptions {
autoStart?: boolean;
pollInterval?: number;
}
interface UseMusicGenerationOptions extends UseAudioPodOptions {
autoStart?: boolean;
pollInterval?: number;
}
interface UseTranscriptionOptions extends UseAudioPodOptions {
autoStart?: boolean;
pollInterval?: number;
}
interface AudioPodEvent {
type: string;
data: any;
timestamp: string;
}
interface JobStatusEvent extends AudioPodEvent {
type: 'job_status';
data: {
jobId: number;
status: JobStatus;
progress: number;
result?: any;
};
}
interface ProgressEvent extends AudioPodEvent {
type: 'progress';
data: {
jobId: number;
progress: number;
message?: string;
};
}
interface StemExtractionRequest {
audioFile?: File | Blob | string;
url?: string;
stemTypes?: ('vocals' | 'drums' | 'bass' | 'other' | 'piano' | 'guitar')[];
modelName?: 'htdemucs' | 'htdemucs_6s';
twoStemsMode?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface StemExtractionResult {
job: Job;
stemPaths?: Record<string, string>;
downloadUrls?: Record<string, string>;
qualityScores?: Record<string, number>;
}
interface RapGenerationRequest {
prompt: string;
lyrics: string;
duration?: number;
style?: 'modern' | 'classic' | 'trap';
tempo?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface InstrumentalGenerationRequest {
prompt: string;
duration?: number;
instruments?: string[];
key?: string;
tempo?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface VocalsGenerationRequest {
prompt: string;
lyrics: string;
duration?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface SamplesGenerationRequest {
prompt: string;
sampleType?: string;
tempo?: number;
duration?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface Audio2AudioRequest {
sourceFile: File | Blob | string;
prompt: string;
refAudioStrength?: number;
audioDuration?: number;
inferStep?: number;
guidanceScale?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface SongBloomRequest {
lyrics: string;
referenceFile: File | Blob | string;
duration?: number;
guidanceScale?: number;
numInferenceSteps?: number;
seed?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface RetakeRequest {
originalJobId: number;
retakeVariance?: number;
retakeSeeds?: number[];
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface RepaintRequest {
sourceJobId: number;
repaintStart: number;
repaintEnd: number;
prompt: string;
lyrics?: string;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface ExtendRequest {
sourceJobId: number;
leftExtendLength?: number;
rightExtendLength?: number;
prompt: string;
lyrics?: string;
extendSeeds?: number[];
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface EditRequest {
sourceJobId: number;
editTargetPrompt: string;
editTargetLyrics?: string;
editType?: 'remix' | 'only_lyrics';
editNMin?: number;
editNMax?: number;
editNAvg?: number;
displayName?: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface VoiceConversionRequest {
sourceFile?: File | Blob | string;
url?: string;
voiceUuid: string;
waitForCompletion?: boolean;
timeout?: number;
}
interface VoiceConversionResult {
job: Job;
outputUrl?: string;
}
interface MultiVoiceSegment {
text: string;
voiceId: number | string;
startTime?: number;
endTime?: number;
speakerLabel?: string;
}
interface MultiVoiceTTSRequest {
segments: MultiVoiceSegment[];
mixMode?: 'sequential' | 'parallel' | 'timed';
outputFormat?: 'wav' | 'mp3' | 'ogg';
silenceDuration?: number;
normalizeVolume?: boolean;
generationParams?: Record<string, any>;
waitForCompletion?: boolean;
timeout?: number;
}
interface MultiVoiceTTSResult {
job: Job;
outputUrl?: string;
totalDuration?: number;
segmentsCount?: number;
uniqueVoicesCount?: number;
}
interface VoiceCollectionRequest {
name: string;
description?: string;
isPublic?: boolean;
color?: string;
icon?: string;
voiceIds?: number[];
}
interface VoiceCollectionResult {
collection: VoiceCollection;
message: string;
}
interface VoiceCollection {
id: number;
userId: string;
name: string;
description?: string;
isPublic: boolean;
color?: string;
icon?: string;
voiceCount?: number;
voices?: VoiceProfile[];
createdAt: string;
updatedAt: string;
}
interface PublicPreviewRequest {
voiceIdentifier: string;
inputText: string;
targetLanguage?: string;
}
interface PublicPreviewResult {
audioUrl: string;
duration?: number;
}
/**
* Voice Service
* Handles voice cloning and TTS operations
*/
declare class VoiceService {
private client;
constructor(client: AudioPodClient);
/**
* Clone a voice from an audio file
* This method first creates a voice profile, then uses it for cloning
*/
cloneVoice(request: VoiceCloneRequest): Promise<VoiceCloneResult>;
/**
* Create a reusable voice profile
*/
createVoiceProfile(name: string, voiceFile: File | Blob | string, description?: string, isPublic?: boolean, waitForCompletion?: boolean, timeout?: number): Promise<VoiceProfile>;
/**
* Generate speech using an existing voice profile
*/
generateSpeech(voiceId: number | string, text: string, options?: {
language?: string;
speed?: number;
audioFormat?: 'mp3' | 'wav';
waitForCompletion?: boolean;
timeout?: number;
}): Promise<VoiceCloneResult>;
/**
* List available voice profiles
*/
listVoiceProfiles(options?: {
voiceType?: 'custom' | 'standard';
isPublic?: boolean;
includePublic?: boolean;
limit?: number;
skip?: number;
}): Promise<VoiceProfile[]>;
/**
* Get details of a specific voice profile
*/
getVoiceProfile(voiceId: number | string): Promise<VoiceProfile>;
/**
* Delete a voice profile
*/
deleteVoiceProfile(voiceId: number | string): Promise<void>;
/**
* Get voice cloning job status
*/
getJobStatus(jobId: number): Promise<Job>;
/**
* List voice cloning jobs
*/
listCloningJobs(options?: {
skip?: number;
limit?: number;
status?: string;
}): Promise<Job[]>;
/**
* Stream voice generation (WebSocket connection)
*/
streamVoiceGeneration(voiceId: number | string, text: string, options?: {
language?: string;
speed?: number;
onProgress?: (progress: number) => void;
onAudioChunk?: (chunk: ArrayBuffer) => void;
onComplete?: (result: any) => void;
onError?: (error: any) => void;
}): Promise<void>;
/**
* Convert source audio to match target voice characteristics
*/
convertVoice(request: VoiceConversionRequest): Promise<VoiceConversionResult>;
/**
* Generate multi-voice TTS with multiple speakers
*/
generateMultiVoiceTTS(request: MultiVoiceTTSRequest): Promise<MultiVoiceTTSResult>;
/**
* Create a voice collection
*/
createVoiceCollection(request: VoiceCollectionRequest): Promise<VoiceCollectionResult>;
/**
* List voice collections
*/
listVoiceCollections(options?: {
includeVoices?: boolean;
includePublic?: boolean;
}): Promise<VoiceCollection[]>;
/**
* Update a voice collection
*/
updateVoiceCollection(collectionId: number, updates: Partial<VoiceCollectionRequest>): Promise<VoiceCollectionResult>;
/**
* Delete a voice collection
*/
deleteVoiceCollection(collectionId: number): Promise<void>;
/**
* Add voices to a collection
*/
addVoicesToCollection(collectionId: number, voiceIds: number[]): Promise<void>;
/**
* Remove voices from a collection
*/
removeVoicesFromCollection(collectionId: number, voiceIds: number[]): Promise<void>;
/**
* Generate public voice preview (no authentication required)
*/
generatePublicPreview(request: PublicPreviewRequest): Promise<PublicPreviewResult>;
/**
* Populate provider voices (admin only)
*/
populateProviderVoices(provider: 'openai' | 'google_gemini' | 'audiopod_sonic'): Promise<{
message: string;
count: number;
}>;
/**
* Get unified TTS job status (works for both single and multi-voice TTS)
*/
getUnifiedJobStatus(jobId: number): Promise<Job>;
/**
* Get unified TTS history (both single and multi-voice jobs)
*/
getUnifiedTTSHistory(options?: {
limit?: number;
offset?: number;
status?: string;
jobType?: 'single' | 'multi';
}): Promise<Job[]>;
}
/**
* Music Service
* Handles music generation operations
*/
declare class MusicService {
private client;
constructor(client: AudioPodClient);
/**
* Generate music from text prompt
*/
generateMusic(request: MusicGenerationRequest): Promise<MusicGenerationResult>;
/**
* Generate rap music from prompt and lyrics
*/
generateRap(request: RapGenerationRequest): Promise<MusicGenerationResult>;
/**
* Generate instrumental music from prompt
*/
generateInstrumental(request: InstrumentalGenerationRequest): Promise<MusicGenerationResult>;
/**
* Generate vocals from prompt and lyrics
*/
generateVocals(request: VocalsGenerationRequest): Promise<MusicGenerationResult>;
/**
* Generate audio samples from prompt
*/
generateSamples(request: SamplesGenerationRequest): Promise<MusicGenerationResult>;
/**
* Transform audio to audio with new characteristics
*/
generateAudio2Audio(request: Audio2AudioRequest): Promise<MusicGenerationResult>;
/**
* Generate music using SongBloom with reference audio
*/
generateSongBloom(request: SongBloomRequest): Promise<MusicGenerationResult>;
/**
* Create a retake/variation of existing music
*/
retakeMusic(request: RetakeRequest): Promise<MusicGenerationResult>;
/**
* Repaint sections of existing music
*/
repaintMusic(request: RepaintRequest): Promise<MusicGenerationResult>;
/**
* Extend existing music with additional content
*/
extendMusic(request: ExtendRequest): Promise<MusicGenerationResult>;
/**
* Edit existing music with new style or lyrics
*/
editMusic(request: EditRequest): Promise<MusicGenerationResult>;
/**
* Get music generation job status
*/
getJobStatus(jobId: number): Promise<Job>;
/**
* List music generation jobs
*/
listJobs(options?: {
limit?: number;
offset?: number;
status?: string;
}): Promise<Job[]>;
/**
* Like a music track
*/
likeMusicTrack(jobId: number): Promise<{
liked: boolean;
totalLikes: number;
}>;
/**
* Unlike a music track
*/
unlikeMusicTrack(jobId: number): Promise<{
liked: boolean;
totalLikes: number;
}>;
/**
* Share a music track
*/
shareMusicTrack(jobId: number, options?: {
title?: string;
description?: string;
expirationDate?: string;
}): Promise<{
shareToken: string;
shareUrl: string;
}>;
/**
* Get music track statistics
*/
getMusicTrackStats(jobId: number): Promise<{
likes: number;
dislikes: number;
shares: number;
comments: number;
}>;
/**
* Delete a music generation job
*/
deleteMusicJob(jobId: number): Promise<void>;
/**
* Get shared music track (public access)
*/
getSharedTrack(shareToken: string): Promise<MusicGenerationResult>;
}
/**
* Transcription Service
* Handles audio transcription operations
*/
declare class TranscriptionService {
private client;
constructor(client: AudioPodClient);
/**
* Transcribe audio to text
*/
transcribeAudio(request: TranscriptionRequest): Promise<TranscriptionResult>;
/**
* Transcribe from URL
*/
transcribeUrl(url: string, options?: {
language?: string;
modelType?: 'whisperx' | 'faster-whisper';
enableSpeakerDiarization?: boolean;
waitForCompletion?: boolean;
timeout?: number;
}): Promise<TranscriptionResult>;
/**
* Get transcription job
*/
getTranscriptionJob(jobId: number): Promise<TranscriptionResult>;
/**
* Download transcript
*/
downloadTranscript(jobId: number, format?: 'json' | 'txt' | 'srt' | 'vtt' | 'pdf' | 'docx' | 'html'): Promise<string>;
}
declare class TranslationService {
private client;
constructor(client: AudioPodClient);
translateAudio(request: TranslationRequest): Promise<TranslationResult>;
getTranslationJob(jobId: number): Promise<TranslationResult>;
listTranslationJobs(options?: {
skip?: number;
limit?: number;
}): Promise<TranslationResult[]>;
deleteTranslationJob(jobId: number): Promise<void>;
}
declare class CreditService {
private client;
constructor(client: AudioPodClient);
getCreditBalance(): Promise<CreditInfo>;
getUsageHistory(): Promise<CreditUsageRecord[]>;
getCreditMultipliers(): Promise<Record<string, number>>;
}
declare class SpeakerService {
private client;
constructor(client: AudioPodClient);
diarizeSpeakers(audioFile: File | Blob | string, numSpeakers?: number, waitForCompletion?: boolean, timeout?: number): Promise<any>;
}
declare class DenoiserService {
private client;
constructor(client: AudioPodClient);
denoiseAudio(audioFile: File | Blob | string, qualityMode?: string, waitForCompletion?: boolean, timeout?: number): Promise<any>;
}
/**
* Stem Extraction Service
* Handles audio stem separation operations
*/
declare class StemExtractionService {
private client;
constructor(client: AudioPodClient);
/**
* Extract stems from audio (vocals, drums, bass, other instruments)
*/
extractStems(request: StemExtractionRequest): Promise<StemExtractionResult>;
/**
* Get stem extraction job status
*/
getJobStatus(jobId: number): Promise<StemExtractionResult>;
/**
* List stem extraction jobs
*/
listJobs(options?: {
skip?: number;
limit?: number;
}): Promise<StemExtractionResult[]>;
/**
* Delete stem extraction job
*/
deleteJob(jobId: number): Promise<void>;
}
/**
* AudioPod Client
* Main client class for the AudioPod API
*/
declare class AudioPodClient {
private readonly httpClient;
private readonly config;
readonly voice: VoiceService;
readonly music: MusicService;
readonly transcription: TranscriptionService;
readonly translation: TranslationService;
readonly credits: CreditService;
readonly speaker: SpeakerService;
readonly denoiser: DenoiserService;
readonly stemExtraction: StemExtractionService;
constructor(config: AudioPodConfig);
private createHttpClient;
private handleApiError;
/**
* Make a GET request to the API
*/
get<T = any>(endpoint: string, params?: Record<string, any>): Promise<T>;
/**
* Make a POST request to the API
*/
post<T = any>(endpoint: string, data?: any, config?: any): Promise<T>;
/**
* Make a PUT request to the API
*/
put<T = any>(endpoint: string, data?: any): Promise<T>;
/**
* Make a DELETE request to the API
*/
delete<T = any>(endpoint: string): Promise<T>;
/**
* Upload a file to the API
*/
uploadFile<T = any>(endpoint: string, fileData: File | Blob, formData?: Record<string, any>, onProgress?: (progress: UploadProgress) => void): Promise<T>;
/**
* Wait for a job to complete
*/
waitForJobCompletion<T = any>(jobId: number, timeout?: number, // 5 minutes
pollInterval?: number, // 5 seconds
onProgress?: (job: Job) => void): Promise<T>;
/**
* Get job status
*/
getJobStatus(jobId: number): Promise<Job>;
/**
* Cancel a job
*/
cancelJob(jobId: number): Promise<void>;
/**
* Check API health
*/
checkHealth(): Promise<{
status: string;
timestamp: string;
}>;
/**
* Get current user information
*/
getUserInfo(): Promise<any>;
/**
* Get API configuration
*/
getConfig(): Readonly<Required<AudioPodConfig>>;
/**
* Update API key
*/
updateApiKey(apiKey: string): void;
/**
* Set debug mode
*/
setDebug(enabled: boolean): void;
}
/**
* React Hooks for AudioPod SDK
* Easy-to-use React hooks for AudioPod functionality
*/
/**
* Base AudioPod hook
*/
declare function useAudioPod(apiKey?: string, options?: UseAudioPodOptions): {
client: AudioPodClient | null;
isInitialized: boolean;
error: AudioPodError | null;
};
/**
* Voice cloning hook
*/
declare function useVoiceCloning(apiKey?: string, options?: UseVoiceCloningOptions): {
cloneVoice: (request: VoiceCloneRequest) => Promise<void>;
cancel: () => void;
isLoading: boolean;
progress: number;
result: VoiceCloneResult | null;
error: AudioPodError | null;
isInitialized: boolean;
};
/**
* Music generation hook
*/
declare function useMusicGeneration(apiKey?: string, options?: UseMusicGenerationOptions): {
generateMusic: (request: MusicGenerationRequest) => Promise<void>;
cancel: () => void;
isLoading: boolean;
progress: number;
result: MusicGenerationResult | null;
error: AudioPodError | null;
isInitialized: boolean;
};
/**
* Transcription hook
*/
declare function useTranscription(apiKey?: string, options?: UseTranscriptionOptions): {
transcribeAudio: (request: TranscriptionRequest) => Promise<void>;
cancel: () => void;
isLoading: boolean;
progress: number;
result: TranscriptionResult | null;
error: AudioPodError | null;
isInitialized: boolean;
};
/**
* Credits hook
*/
declare function useCredits(apiKey?: string): {
credits: CreditInfo | null;
isLoading: boolean;
error: AudioPodError | null;
refetch: () => Promise<void>;
isInitialized: boolean;
};
export { AudioPodClient, JobStatus, TTSProvider, VoiceType, useAudioPod, useCredits, useMusicGeneration, useTranscription, useVoiceCloning };
export type { ApiResponse, Audio2AudioRequest, AudioPodConfig, AudioPodError, AudioPodEvent, CreditInfo, CreditUsageRecord, EditRequest, ExtendRequest, InstrumentalGenerationRequest, Job, JobProgress, JobStatusEvent, MultiVoiceSegment, MultiVoiceTTSRequest, MultiVoiceTTSResult, MusicGenerationRequest, MusicGenerationResult, ProgressEvent, PublicPreviewRequest, PublicPreviewResult, RapGenerationRequest, RepaintRequest, RetakeRequest, SamplesGenerationRequest, SongBloomRequest, StemExtractionRequest, StemExtractionResult, TranscriptionRequest, TranscriptionResult, TranscriptionSegment, TranslationRequest, TranslationResult, UploadProgress, UseAudioPodOptions, UseMusicGenerationOptions, UseTranscriptionOptions, UseVoiceCloningOptions, VocalsGenerationRequest, VoiceCloneRequest, VoiceCloneResult, VoiceCollection, VoiceCollectionRequest, VoiceCollectionResult, VoiceConversionRequest, VoiceConversionResult, VoiceProfile };