@mixio-pro/kalaasetu-mcp

import { z } from "zod"; import { callFalModel } from "../utils/fal.utils"; /** * Calculate number of frames based on audio duration at 25 FPS * Adds 1 second buffer to ensure complete audio coverage */ function calculateFramesFromAudioDuration( audioDurationSeconds: number ): number { const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer const frames = Math.round(totalDuration * 25); // 25 FPS // Clamp to valid range (41-721 frames) return Math.max(41, Math.min(721, frames)); } /** * FAL AI Infinitalk - Generate talking avatar video from image and audio */ export const infinitalk = { name: "infinitalk", description: "Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.", parameters: z.object({ image_url: z .string() .describe( "Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped." ), audio_url: z .string() .describe("The Public URL of the audio file for lip-sync generation."), audio_duration_seconds: z .number() .optional() .describe( "Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)." ), prompt: z .string() .describe( "The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')" ), num_frames: z .number() .optional() .describe( "Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145" ), resolution: z .enum(["480p", "720p"]) .optional() .describe("Resolution of the video to generate. Default: '480p'"), seed: z .number() .optional() .describe( "Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42" ), acceleration: z .enum(["none", "regular", "high"]) .optional() .describe( "The acceleration level to use for generation. Default: 'regular'" ), fal_key: z .string() .optional() .describe( "FAL API key. If not provided, will use FAL_KEY environment variable." ), }), execute: async (args: { image_url: string; audio_url: string; audio_duration_seconds?: number; prompt: string; num_frames?: number; resolution?: "480p" | "720p"; seed?: number; acceleration?: "none" | "regular" | "high"; fal_key?: string; }) => { // Calculate frames from audio duration if provided and num_frames not specified let calculatedFrames = args.num_frames; if ( args.audio_duration_seconds !== undefined && args.num_frames === undefined ) { calculatedFrames = calculateFramesFromAudioDuration( args.audio_duration_seconds ); } // Validate num_frames range if provided if ( calculatedFrames !== undefined && (calculatedFrames < 41 || calculatedFrames > 721) ) { throw new Error("num_frames must be between 41 and 721"); } // Build input payload const input: any = { image_url: args.image_url, audio_url: args.audio_url, prompt: args.prompt, }; // Add optional parameters if provided if (calculatedFrames !== undefined) { input.num_frames = calculatedFrames; } input.resolution = args.resolution || "480p"; if (args.seed !== undefined) { input.seed = args.seed; } if (args.acceleration !== undefined) { input.acceleration = args.acceleration; } const result = await callFalModel("fal-ai/infinitalk", input, { falKey: args.fal_key, }); // Extract video data from the response const videoData = result.data?.video; const seed = result.data?.seed; if (!videoData || !videoData.url) { throw new Error( `No video data in completed response: ${JSON.stringify(result.data)}` ); } const videoUrl = videoData.url; const fileName = videoData.file_name || "infinitalk.mp4"; return JSON.stringify({ videos: [ { url: videoUrl, filename: fileName, mimeType: "video/mp4", filesize: videoData.file_size, }, ], message: "Infinitalk video generated successfully", seed: seed, requestId: result.requestId, }); }, };