@mixio-pro/kalaasetu-mcp
Version:
A powerful Model Context Protocol server providing AI tools for content generation and analysis
356 lines (330 loc) • 11 kB
text/typescript
import * as fs from "fs";
import { GoogleAuth } from "google-auth-library";
import { exec } from "child_process";
import * as path from "path";
import { z } from "zod";
import { getStorage } from "../storage";
import { generateTimestampedFilename } from "../utils/filename";
async function wait(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function fetchAccessToken(): Promise<string> {
try {
const auth = new GoogleAuth({
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
});
const client = await auth.getClient();
const token = await client.getAccessToken();
if (!token || typeof token !== "string") {
throw new Error("No token from GoogleAuth");
}
return token;
} catch (e) {
// Fallback to gcloud
return await new Promise((resolve, reject) => {
exec("gcloud auth print-access-token", (err, stdout, stderr) => {
if (err) {
reject(
new Error(
`Failed to fetch an access token (ADC and gcloud): ${
stderr || err.message
}`
)
);
return;
}
const t = (stdout || "").trim();
if (!t) {
reject(
new Error(
"Failed to fetch an access token: empty token from gcloud"
)
);
return;
}
resolve(t);
});
});
}
}
async function fileToBase64(
filePath: string
): Promise<{ data: string; mimeType: string }> {
const storage = getStorage();
// Check if file exists
const exists = await storage.exists(filePath);
if (!exists) {
// Try to provide more helpful error information
const isAbsolute = path.isAbsolute(filePath);
const resolvedPath = isAbsolute
? filePath
: path.resolve(process.cwd(), filePath);
throw new Error(
`File not found: ${filePath}\n` +
`Resolved path: ${resolvedPath}\n` +
`Is absolute: ${isAbsolute}\n` +
`CWD: ${process.cwd()}`
);
}
const buf = await storage.readFile(filePath);
const data = Buffer.from(buf).toString("base64");
// Default to PNG if not sure, similar to existing code
const mimeType = "image/png";
return { data, mimeType };
}
export const imageToVideo = {
name: "generateVideoi2v",
description:
"Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
parameters: z.object({
prompt: z.string().describe("Text description for the video"),
image_path: z
.string()
.optional()
.describe("Path to source image for image-to-video generation"),
last_frame_path: z
.string()
.optional()
.describe("Path to last frame image to guide ending frame (optional)"),
aspect_ratio: z
.string()
.optional()
.describe("Video aspect ratio: '16:9' or '9:16' (default: '9:16')"),
duration_seconds: z
.string()
.optional()
.describe("Video duration in seconds: '4', '6', or '8' (default: '6')"),
resolution: z
.string()
.optional()
.describe("Video resolution: '720p' or '1080p' (default: '720p')"),
negative_prompt: z
.string()
.optional()
.describe("Text describing what not to include in the video"),
person_generation: z
.string()
.optional()
.describe(
"Controls generation of people: 'allow_adult' (default for image-to-video) or 'allow_all'"
),
reference_images: z
.array(z.string())
.optional()
.describe("Additional image paths for reference (max 3)"),
output_path: z
.string()
.optional()
.describe(
"Output MP4 file path (if multiple predictions, index suffix is added)"
),
project_id: z
.string()
.optional()
.describe("GCP Project ID (default: mixio-pro)"),
location_id: z
.string()
.optional()
.describe("Vertex region (default: us-central1)"),
model_id: z
.string()
.optional()
.describe("Model ID (default: veo-3.1-fast-generate-preview)"),
generate_audio: z
.boolean()
.optional()
.describe(
"Boolean flag to enable generation of audio along with the video"
)
.default(false),
}),
async execute(args: {
prompt: string;
image_path?: string;
last_frame_path?: string;
aspect_ratio?: string;
duration_seconds?: string;
resolution?: string;
negative_prompt?: string;
person_generation?: string;
reference_images?: string[] | string;
output_path?: string;
project_id?: string;
location_id?: string;
model_id?: string;
generate_audio?: boolean;
}) {
const projectId = args.project_id || "mixio-pro";
const location = args.location_id || "us-central1";
const modelId = args.model_id || "veo-3.1-fast-generate-preview";
const token = await fetchAccessToken();
const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predictLongRunning`;
let imagePart: any = undefined;
if (args.image_path) {
const { data, mimeType } = await fileToBase64(args.image_path);
imagePart = {
image: {
bytesBase64Encoded: data,
mimeType,
},
};
}
let lastFramePart: any = undefined;
if (args.last_frame_path) {
const { data, mimeType } = await fileToBase64(args.last_frame_path);
lastFramePart = {
lastFrame: {
bytesBase64Encoded: data,
mimeType,
},
};
}
let referenceImages: any[] | undefined = undefined;
if (args.reference_images) {
let refImages: string[];
if (typeof args.reference_images === "string") {
if (
args.reference_images.startsWith("[") &&
args.reference_images.endsWith("]")
) {
try {
refImages = JSON.parse(args.reference_images);
} catch {
throw new Error("Invalid reference_images format");
}
} else {
refImages = [args.reference_images];
}
} else if (Array.isArray(args.reference_images)) {
refImages = args.reference_images;
} else {
throw new Error("Invalid reference_images: must be array or string");
}
if (refImages.length > 0) {
referenceImages = await Promise.all(
refImages.slice(0, 3).map(async (p) => {
const { data, mimeType } = await fileToBase64(p);
return {
image: {
bytesBase64Encoded: data,
mimeType,
},
referenceType: "asset",
};
})
);
}
}
const personGeneration =
args.person_generation || (args.image_path ? "allow_adult" : "allow_all");
const instances: any[] = [
{
prompt: args.prompt,
...(imagePart || {}),
...(lastFramePart || {}),
...(referenceImages ? { referenceImages } : {}),
},
];
const parameters: any = {
aspectRatio: args.aspect_ratio || "9:16",
durationSeconds: parseInt(args.duration_seconds || "6") || 6,
resolution: args.resolution || "720p",
negativePrompt: args.negative_prompt,
generateAudio: args.generate_audio || false,
personGeneration,
};
const res = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ instances, parameters }),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Vertex request failed: ${res.status} ${text}`);
}
const op = (await res.json()) as any;
const name: string = op.name || op.operation || "";
if (!name) {
throw new Error(
"Vertex did not return an operation name for long-running request"
);
}
let current = op;
let done = !!op.done;
let tries = 0;
// Poll using fetchPredictOperation as per Vertex recommendation
const fetchUrl = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:fetchPredictOperation`;
while (!done && tries < 60) {
await wait(10000);
const poll = await fetch(fetchUrl, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ operationName: name }),
});
if (!poll.ok) {
const text = await poll.text();
throw new Error(`Vertex operation poll failed: ${poll.status} ${text}`);
}
current = (await poll.json()) as any;
done = !!current.done || !!current.response;
tries++;
}
const resp = current.response || current;
// Decode from response.videos[].bytesBase64Encoded only
const videos: Array<{ url: string; filename: string; mimeType: string }> =
[];
const saveVideo = async (base64: string, index: number) => {
if (!base64) return;
// Use provided output path or generate default with timestamp
let filePath: string;
if (args.output_path) {
// User provided path - use as-is for first video, add index for subsequent
filePath = index === 0
? args.output_path
: args.output_path.replace(/\.mp4$/i, `_${index}.mp4`);
} else {
// No path provided - generate timestamped default
const defaultName = `video_output${index > 0 ? `_${index}` : ""}.mp4`;
filePath = generateTimestampedFilename(defaultName);
}
const buf = Buffer.from(base64, "base64");
const storage = getStorage();
const url = await storage.writeFile(filePath, buf);
videos.push({
url,
filename: filePath,
mimeType: "video/mp4",
});
};
if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
for (let i = 0; i < resp.videos.length; i++) {
const v = resp.videos[i] || {};
if (typeof v.bytesBase64Encoded === "string") {
await saveVideo(v.bytesBase64Encoded, i);
}
}
}
if (videos.length > 0) {
return JSON.stringify({
videos,
message: "Video(s) generated successfully",
});
}
// If nothing saved, return a concise summary plus head/tail snippets of JSON
let jsonStr = "";
try {
jsonStr = JSON.stringify(resp);
} catch {}
const head150 = jsonStr ? jsonStr.slice(0, 150) : "";
const tail50 = jsonStr
? jsonStr.slice(Math.max(0, jsonStr.length - 50))
: "";
return `Vertex operation done but no videos array present. operationName=${name}. json_head150=${head150} json_tail50=${tail50}`;
},
};