@mixio-pro/kalaasetu-mcp
Version:
A powerful Model Context Protocol server providing AI tools for content generation and analysis
161 lines (148 loc) • 4.84 kB
text/typescript
import { z } from "zod";
import { callFalModel } from "../utils/fal.utils";
/**
* Calculate number of frames based on audio duration at 25 FPS
* Adds 1 second buffer to ensure complete audio coverage
*/
function calculateFramesFromAudioDuration(
audioDurationSeconds: number
): number {
const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
const frames = Math.round(totalDuration * 25); // 25 FPS
// Clamp to valid range (129-401 frames)
return Math.max(129, Math.min(401, frames));
}
/**
* FAL AI Hunyuan Avatar - High-Fidelity Audio-Driven Human Animation
*/
export const hunyuanAvatar = {
name: "hunyuan_avatar",
description:
"Generate high-fidelity audio-driven human animation videos using FAL AI Hunyuan Avatar. Creates realistic talking avatar animations from an image and audio file.",
parameters: z.object({
image_url: z
.string()
.describe("Public URL of the reference image for the avatar."),
audio_url: z
.string()
.describe("Public URL of the audio file to drive the animation."),
audio_duration_seconds: z
.number()
.optional()
.describe(
"Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
),
text: z
.string()
.optional()
.describe(
"Text prompt describing the scene. Default: 'A cat is singing.'"
),
num_frames: z
.number()
.optional()
.describe(
"Number of video frames to generate at 25 FPS. Range: 129 to 401. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 129"
),
num_inference_steps: z
.number()
.optional()
.describe(
"Number of inference steps for sampling. Higher values give better quality but take longer. Range: 30 to 50. Default: 30"
),
turbo_mode: z
.boolean()
.optional()
.describe(
"If true, the video will be generated faster with no noticeable degradation in visual quality. Default: true"
),
seed: z.number().optional().describe("Random seed for generation."),
fal_key: z
.string()
.optional()
.describe(
"FAL API key. If not provided, will use FAL_KEY environment variable."
),
}),
execute: async (args: {
image_url: string;
audio_url: string;
audio_duration_seconds?: number;
text?: string;
num_frames?: number;
num_inference_steps?: number;
turbo_mode?: boolean;
seed?: number;
fal_key?: string;
}) => {
// Calculate frames from audio duration if provided and num_frames not specified
let calculatedFrames = args.num_frames;
if (
args.audio_duration_seconds !== undefined &&
args.num_frames === undefined
) {
calculatedFrames = calculateFramesFromAudioDuration(
args.audio_duration_seconds
);
}
// Validate num_frames range if provided
if (
calculatedFrames !== undefined &&
(calculatedFrames < 129 || calculatedFrames > 401)
) {
throw new Error("num_frames must be between 129 and 401");
}
// Validate num_inference_steps range if provided
if (
args.num_inference_steps !== undefined &&
(args.num_inference_steps < 30 || args.num_inference_steps > 50)
) {
throw new Error("num_inference_steps must be between 30 and 50");
}
// Build input payload
const input: any = {
image_url: args.image_url,
audio_url: args.audio_url,
};
// Add optional parameters if provided
if (args.text !== undefined) {
input.text = args.text;
}
if (calculatedFrames !== undefined) {
input.num_frames = calculatedFrames;
}
if (args.num_inference_steps !== undefined) {
input.num_inference_steps = args.num_inference_steps;
}
if (args.turbo_mode !== undefined) {
input.turbo_mode = args.turbo_mode;
}
if (args.seed !== undefined) {
input.seed = args.seed;
}
const result = await callFalModel("fal-ai/hunyuan-avatar", input, {
falKey: args.fal_key,
});
// Extract video data from the response
const videoData = result.data?.video;
if (!videoData || !videoData.url) {
throw new Error(
`No video data in completed response: ${JSON.stringify(result.data)}`
);
}
const videoUrl = videoData.url;
const fileName = videoData.file_name || "hunyuan_avatar.mp4";
return JSON.stringify({
videos: [
{
url: videoUrl,
filename: fileName,
mimeType: "video/mp4",
filesize: videoData.file_size,
},
],
message: "Hunyuan Avatar video generated successfully",
requestId: result.requestId,
});
},
};