@tanstack/ai
Version:
Core TanStack AI library - Open source AI SDK
225 lines (202 loc) • 6.68 kB
text/typescript
/**
* Audio Generation Activity
*
* Generates audio (music, sound effects, etc.) from text prompts.
* This is a self-contained module with implementation, types, and JSDoc.
*/
import { aiEventClient } from '@tanstack/ai-event-client'
import { streamGenerationResult } from '../stream-generation-result.js'
import { resolveDebugOption } from '../../logger/resolve'
import type { InternalLogger } from '../../logger/internal-logger'
import type { DebugOption } from '../../logger/types'
import type { AudioAdapter } from './adapter'
import type { AudioGenerationResult, StreamChunk } from '../../types'
// ===========================
// Activity Kind
// ===========================
/** The adapter kind this activity handles */
export const kind = 'audio' as const
// ===========================
// Type Extraction Helpers
// ===========================
/**
* Extract provider options from an AudioAdapter via ~types.
*/
export type AudioProviderOptions<TAdapter> =
TAdapter extends AudioAdapter<any, any>
? TAdapter['~types']['providerOptions']
: object
// ===========================
// Activity Options Type
// ===========================
/**
* Options for the audio generation activity.
* The model is extracted from the adapter's model property.
*
* @template TAdapter - The audio adapter type
* @template TStream - Whether to stream the output
*/
export interface AudioActivityOptions<
TAdapter extends AudioAdapter<string, AudioProviderOptions<TAdapter>>,
TStream extends boolean = false,
> {
/** The audio adapter to use (must be created with a model) */
adapter: TAdapter & { kind: typeof kind }
/** Text description of the desired audio */
prompt: string
/** Desired duration in seconds */
duration?: number
/** Provider-specific options for audio generation */
modelOptions?: AudioProviderOptions<TAdapter>
/**
* Whether to stream the generation result.
* When true, returns an AsyncIterable<StreamChunk> for streaming transport.
* When false or not provided, returns a Promise<AudioGenerationResult>.
*
* @default false
*/
stream?: TStream
/**
* Enable debug logging. Pass `true` to enable all categories, `false` to
* silence everything including errors, or a `DebugConfig` object for granular
* control and/or a custom `Logger`.
*/
debug?: DebugOption
}
// ===========================
// Activity Result Type
// ===========================
/**
* Result type for the audio generation activity.
* - If stream is true: AsyncIterable<StreamChunk>
* - Otherwise: Promise<AudioGenerationResult>
*/
export type AudioActivityResult<TStream extends boolean = false> =
TStream extends true
? AsyncIterable<StreamChunk>
: Promise<AudioGenerationResult>
function createId(prefix: string): string {
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`
}
// ===========================
// Activity Implementation
// ===========================
/**
* Audio generation activity - generates audio from text prompts.
*
* Uses AI models to create music, sound effects, and other audio content.
*
* @example Generate music from a prompt
* ```ts
* import { generateAudio } from '@tanstack/ai'
* import { falAudio } from '@tanstack/ai-fal'
*
* const result = await generateAudio({
* adapter: falAudio('fal-ai/diffrhythm'),
* prompt: 'An upbeat electronic track with synths',
* duration: 10
* })
*
* console.log(result.audio.url) // URL to generated audio
* ```
*/
export function generateAudio<
TAdapter extends AudioAdapter<string, AudioProviderOptions<TAdapter>>,
TStream extends boolean = false,
>(
options: AudioActivityOptions<TAdapter, TStream>,
): AudioActivityResult<TStream> {
if (options.stream) {
return streamGenerationResult(() =>
runGenerateAudio(options),
) as AudioActivityResult<TStream>
}
return runGenerateAudio(options) as AudioActivityResult<TStream>
}
/**
* Run the core audio generation logic (non-streaming).
*/
async function runGenerateAudio<
TAdapter extends AudioAdapter<string, AudioProviderOptions<TAdapter>>,
>(
options: AudioActivityOptions<TAdapter, boolean>,
): Promise<AudioGenerationResult> {
const { adapter, stream: _stream, debug: _debug, ...rest } = options
const model = adapter.model
const requestId = createId('audio')
const startTime = Date.now()
const logger: InternalLogger = resolveDebugOption(options.debug)
const providerName =
(adapter as { name?: string; provider?: string }).provider ??
(adapter as { name?: string }).name ??
'unknown'
aiEventClient.emit('audio:request:started', {
requestId,
provider: adapter.name,
model,
prompt: rest.prompt,
duration: rest.duration,
modelOptions: rest.modelOptions as Record<string, unknown> | undefined,
timestamp: startTime,
})
logger.request(`activity=generateAudio provider=${providerName}`, {
provider: providerName,
model,
})
try {
const result = await adapter.generateAudio({ ...rest, model, logger })
const elapsedMs = Date.now() - startTime
aiEventClient.emit('audio:request:completed', {
requestId,
provider: adapter.name,
model,
audio: result.audio,
duration: elapsedMs,
modelOptions: rest.modelOptions as Record<string, unknown> | undefined,
timestamp: Date.now(),
})
logger.output(`activity=generateAudio provider=${providerName}`, {
contentType: result.audio.contentType,
audioDuration: result.audio.duration,
})
return result
} catch (error) {
const elapsedMs = Date.now() - startTime
const err = error as Error
aiEventClient.emit('audio:request:error', {
requestId,
provider: adapter.name,
model,
error: { message: err.message, name: err.name },
duration: elapsedMs,
modelOptions: rest.modelOptions as Record<string, unknown> | undefined,
timestamp: Date.now(),
})
logger.errors('generateAudio activity failed', {
error,
source: 'generateAudio',
})
throw error
}
}
// ===========================
// Options Factory
// ===========================
/**
* Create typed options for the generateAudio() function without executing.
*/
export function createAudioOptions<
TAdapter extends AudioAdapter<string, AudioProviderOptions<TAdapter>>,
TStream extends boolean = false,
>(
options: AudioActivityOptions<TAdapter, TStream>,
): AudioActivityOptions<TAdapter, TStream> {
return options
}
// Re-export adapter types
export type {
AudioAdapter,
AudioAdapterConfig,
AnyAudioAdapter,
} from './adapter'
export { BaseAudioAdapter } from './adapter'