UNPKG

@mixio-pro/kalaasetu-mcp

Version:

A powerful Model Context Protocol server providing AI tools for content generation and analysis

356 lines (330 loc) 11 kB
import * as fs from "fs"; import { GoogleAuth } from "google-auth-library"; import { exec } from "child_process"; import * as path from "path"; import { z } from "zod"; import { getStorage } from "../storage"; import { generateTimestampedFilename } from "../utils/filename"; async function wait(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)); } async function fetchAccessToken(): Promise<string> { try { const auth = new GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"], }); const client = await auth.getClient(); const token = await client.getAccessToken(); if (!token || typeof token !== "string") { throw new Error("No token from GoogleAuth"); } return token; } catch (e) { // Fallback to gcloud return await new Promise((resolve, reject) => { exec("gcloud auth print-access-token", (err, stdout, stderr) => { if (err) { reject( new Error( `Failed to fetch an access token (ADC and gcloud): ${ stderr || err.message }` ) ); return; } const t = (stdout || "").trim(); if (!t) { reject( new Error( "Failed to fetch an access token: empty token from gcloud" ) ); return; } resolve(t); }); }); } } async function fileToBase64( filePath: string ): Promise<{ data: string; mimeType: string }> { const storage = getStorage(); // Check if file exists const exists = await storage.exists(filePath); if (!exists) { // Try to provide more helpful error information const isAbsolute = path.isAbsolute(filePath); const resolvedPath = isAbsolute ? filePath : path.resolve(process.cwd(), filePath); throw new Error( `File not found: ${filePath}\n` + `Resolved path: ${resolvedPath}\n` + `Is absolute: ${isAbsolute}\n` + `CWD: ${process.cwd()}` ); } const buf = await storage.readFile(filePath); const data = Buffer.from(buf).toString("base64"); // Default to PNG if not sure, similar to existing code const mimeType = "image/png"; return { data, mimeType }; } export const imageToVideo = { name: "generateVideoi2v", description: "Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).", parameters: z.object({ prompt: z.string().describe("Text description for the video"), image_path: z .string() .optional() .describe("Path to source image for image-to-video generation"), last_frame_path: z .string() .optional() .describe("Path to last frame image to guide ending frame (optional)"), aspect_ratio: z .string() .optional() .describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"), duration_seconds: z .string() .optional() .describe("Video duration in seconds: '4', '6', or '8' (default: '6')"), resolution: z .string() .optional() .describe("Video resolution: '720p' or '1080p' (default: '720p')"), negative_prompt: z .string() .optional() .describe("Text describing what not to include in the video"), person_generation: z .string() .optional() .describe( "Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'" ), reference_images: z .array(z.string()) .optional() .describe("Additional image paths for reference (max 3)"), output_path: z .string() .optional() .describe( "Output MP4 file path (if multiple predictions, index suffix is added)" ), project_id: z .string() .optional() .describe("GCP Project ID (default: mixio-pro)"), location_id: z .string() .optional() .describe("Vertex region (default: us-central1)"), model_id: z .string() .optional() .describe("Model ID (default: veo-3.1-fast-generate-preview)"), generate_audio: z .boolean() .optional() .describe( "Boolean flag to enable generation of audio along with the video" ) .default(false), }), async execute(args: { prompt: string; image_path?: string; last_frame_path?: string; aspect_ratio?: string; duration_seconds?: string; resolution?: string; negative_prompt?: string; person_generation?: string; reference_images?: string[] | string; output_path?: string; project_id?: string; location_id?: string; model_id?: string; generate_audio?: boolean; }) { const projectId = args.project_id || "mixio-pro"; const location = args.location_id || "us-central1"; const modelId = args.model_id || "veo-3.1-fast-generate-preview"; const token = await fetchAccessToken(); const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`; let imagePart: any = undefined; if (args.image_path) { const { data, mimeType } = await fileToBase64(args.image_path); imagePart = { image: { bytesBase64Encoded: data, mimeType, }, }; } let lastFramePart: any = undefined; if (args.last_frame_path) { const { data, mimeType } = await fileToBase64(args.last_frame_path); lastFramePart = { lastFrame: { bytesBase64Encoded: data, mimeType, }, }; } let referenceImages: any[] | undefined = undefined; if (args.reference_images) { let refImages: string[]; if (typeof args.reference_images === "string") { if ( args.reference_images.startsWith("[") && args.reference_images.endsWith("]") ) { try { refImages = JSON.parse(args.reference_images); } catch { throw new Error("Invalid reference_images format"); } } else { refImages = [args.reference_images]; } } else if (Array.isArray(args.reference_images)) { refImages = args.reference_images; } else { throw new Error("Invalid reference_images: must be array or string"); } if (refImages.length > 0) { referenceImages = await Promise.all( refImages.slice(0, 3).map(async (p) => { const { data, mimeType } = await fileToBase64(p); return { image: { bytesBase64Encoded: data, mimeType, }, referenceType: "asset", }; }) ); } } const personGeneration = args.person_generation || (args.image_path ? "allow_adult" : "allow_all"); const instances: any[] = [ { prompt: args.prompt, ...(imagePart || {}), ...(lastFramePart || {}), ...(referenceImages ? { referenceImages } : {}), }, ]; const parameters: any = { aspectRatio: args.aspect_ratio || "9:16", durationSeconds: parseInt(args.duration_seconds || "6") || 6, resolution: args.resolution || "720p", negativePrompt: args.negative_prompt, generateAudio: args.generate_audio || false, personGeneration, }; const res = await fetch(url, { method: "POST", headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json", }, body: JSON.stringify({ instances, parameters }), }); if (!res.ok) { const text = await res.text(); throw new Error(`Vertex request failed: ${res.status} ${text}`); } const op = (await res.json()) as any; const name: string = op.name || op.operation || ""; if (!name) { throw new Error( "Vertex did not return an operation name for long-running request" ); } let current = op; let done = !!op.done; let tries = 0; // Poll using fetchPredictOperation as per Vertex recommendation const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`; while (!done && tries < 60) { await wait(10000); const poll = await fetch(fetchUrl, { method: "POST", headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json", }, body: JSON.stringify({ operationName: name }), }); if (!poll.ok) { const text = await poll.text(); throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`); } current = (await poll.json()) as any; done = !!current.done || !!current.response; tries++; } const resp = current.response || current; // Decode from response.videos[].bytesBase64Encoded only const videos: Array<{ url: string; filename: string; mimeType: string }> = []; const saveVideo = async (base64: string, index: number) => { if (!base64) return; // Use provided output path or generate default with timestamp let filePath: string; if (args.output_path) { // User provided path - use as-is for first video, add index for subsequent filePath = index === 0 ? args.output_path : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`); } else { // No path provided - generate timestamped default const defaultName = `video_output${index > 0 ? `_${index}` : ""}.mp4`; filePath = generateTimestampedFilename(defaultName); } const buf = Buffer.from(base64, "base64"); const storage = getStorage(); const url = await storage.writeFile(filePath, buf); videos.push({ url, filename: filePath, mimeType: "video/mp4", }); }; if (Array.isArray(resp?.videos) && resp.videos.length > 0) { for (let i = 0; i < resp.videos.length; i++) { const v = resp.videos[i] || {}; if (typeof v.bytesBase64Encoded === "string") { await saveVideo(v.bytesBase64Encoded, i); } } } if (videos.length > 0) { return JSON.stringify({ videos, message: "Video(s) generated successfully", }); } // If nothing saved, return a concise summary plus head/tail snippets of JSON let jsonStr = ""; try { jsonStr = JSON.stringify(resp); } catch {} const head150 = jsonStr ? jsonStr.slice(0, 150) : ""; const tail50 = jsonStr ? jsonStr.slice(Math.max(0, jsonStr.length - 50)) : ""; return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`; }, };