@mixio-pro/kalaasetu-mcp
Version:
A powerful Model Context Protocol server providing AI tools for content generation and analysis
157 lines (143 loc) • 4.64 kB
text/typescript
import { z } from "zod";
import { callFalModel } from "../utils/fal.utils";
/**
* Calculate number of frames based on audio duration at 25 FPS
* Adds 1 second buffer to ensure complete audio coverage
*/
function calculateFramesFromAudioDuration(
audioDurationSeconds: number
): number {
const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
const frames = Math.round(totalDuration * 25); // 25 FPS
// Clamp to valid range (41-721 frames)
return Math.max(41, Math.min(721, frames));
}
/**
* FAL AI Infinitalk - Generate talking avatar video from image and audio
*/
export const infinitalk = {
name: "infinitalk",
description:
"Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.",
parameters: z.object({
image_url: z
.string()
.describe(
"Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped."
),
audio_url: z
.string()
.describe("The Public URL of the audio file for lip-sync generation."),
audio_duration_seconds: z
.number()
.optional()
.describe(
"Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
),
prompt: z
.string()
.describe(
"The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')"
),
num_frames: z
.number()
.optional()
.describe(
"Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145"
),
resolution: z
.enum(["480p", "720p"])
.optional()
.describe("Resolution of the video to generate. Default: '480p'"),
seed: z
.number()
.optional()
.describe(
"Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42"
),
acceleration: z
.enum(["none", "regular", "high"])
.optional()
.describe(
"The acceleration level to use for generation. Default: 'regular'"
),
fal_key: z
.string()
.optional()
.describe(
"FAL API key. If not provided, will use FAL_KEY environment variable."
),
}),
execute: async (args: {
image_url: string;
audio_url: string;
audio_duration_seconds?: number;
prompt: string;
num_frames?: number;
resolution?: "480p" | "720p";
seed?: number;
acceleration?: "none" | "regular" | "high";
fal_key?: string;
}) => {
// Calculate frames from audio duration if provided and num_frames not specified
let calculatedFrames = args.num_frames;
if (
args.audio_duration_seconds !== undefined &&
args.num_frames === undefined
) {
calculatedFrames = calculateFramesFromAudioDuration(
args.audio_duration_seconds
);
}
// Validate num_frames range if provided
if (
calculatedFrames !== undefined &&
(calculatedFrames < 41 || calculatedFrames > 721)
) {
throw new Error("num_frames must be between 41 and 721");
}
// Build input payload
const input: any = {
image_url: args.image_url,
audio_url: args.audio_url,
prompt: args.prompt,
};
// Add optional parameters if provided
if (calculatedFrames !== undefined) {
input.num_frames = calculatedFrames;
}
input.resolution = args.resolution || "480p";
if (args.seed !== undefined) {
input.seed = args.seed;
}
if (args.acceleration !== undefined) {
input.acceleration = args.acceleration;
}
const result = await callFalModel("fal-ai/infinitalk", input, {
falKey: args.fal_key,
});
// Extract video data from the response
const videoData = result.data?.video;
const seed = result.data?.seed;
if (!videoData || !videoData.url) {
throw new Error(
`No video data in completed response: ${JSON.stringify(result.data)}`
);
}
const videoUrl = videoData.url;
const fileName = videoData.file_name || "infinitalk.mp4";
return JSON.stringify({
videos: [
{
url: videoUrl,
filename: fileName,
mimeType: "video/mp4",
filesize: videoData.file_size,
},
],
message: "Infinitalk video generated successfully",
seed: seed,
requestId: result.requestId,
});
},
};