@bashcat/ai-image-chat-mcp
Version:
MCP server for AI image generation, video generation and chat completion with support for multiple AI providers including Tongyi Wanxiang
1,331 lines (1,283 loc) • 58.8 kB
JavaScript
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from "@modelcontextprotocol/sdk/types.js";
import dotenv from "dotenv";
import axios from "axios";
import * as fs from "fs";
import * as path from "path";
import * as os from "os";
import sharp from "sharp";
import { createRequire } from "module";
const require = createRequire(import.meta.url);
const packageJson = require("../package.json");
// 載入環境變數
dotenv.config();
class AIImageChatMCPServer {
server;
apiKey;
baseUrl;
saveDirectory;
dashScopeApiKey;
constructor() {
this.server = new Server({
name: "ai-image-chat-mcp-server",
version: packageJson.version,
}, {
capabilities: {
tools: {},
},
});
// 從環境變數獲取 API 配置
this.apiKey = process.env.AI_API_KEY || "";
this.baseUrl = process.env.AI_API_BASE_URL || "https://api.laozhang.ai/v1";
this.saveDirectory = process.env.AI_IMAGE_SAVE_PATH || path.join(os.homedir(), "generated_images");
// 支援 ALI_API_KEY 和 DASHSCOPE_API_KEY 兩種環境變數名稱
this.dashScopeApiKey = process.env.ALI_API_KEY || process.env.DASHSCOPE_API_KEY || "";
if (!this.apiKey) {
throw new Error("AI_API_KEY 環境變數未設定");
}
// 確保保存目錄存在
if (!fs.existsSync(this.saveDirectory)) {
fs.mkdirSync(this.saveDirectory, { recursive: true });
}
this.setupToolHandlers();
}
generateFilename(prompt, outputFormat = 'jpg') {
// 清理 prompt 作為檔名,移除特殊字符並限制長度
const cleanPrompt = prompt
.replace(/[^\w\s\u4e00-\u9fff]/g, '') // 保留字母、數字、空格和中文字符
.replace(/\s+/g, '-') // 將空格替換為連字符
.substring(0, 50); // 限制長度為50字符
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
// 確保輸出格式為小寫
const extension = outputFormat.toLowerCase() === 'jpeg' ? 'jpg' : outputFormat.toLowerCase();
return `${cleanPrompt}-${timestamp}.${extension}`;
}
formatSaveDirectoryForDisplay() {
// 如果是預設的 home 目錄路徑,顯示更友好的格式
const homeDir = os.homedir();
if (this.saveDirectory.startsWith(homeDir)) {
return this.saveDirectory.replace(homeDir, '~');
}
return this.saveDirectory;
}
formatFilePathForDisplay(filePath) {
// 將絕對路徑轉換為相對於 home 目錄的顯示格式
const homeDir = os.homedir();
if (filePath.startsWith(homeDir)) {
return filePath.replace(homeDir, '~');
}
return filePath;
}
async downloadAndSaveImage(imageUrl, filename, outputFormat = 'jpg') {
try {
const response = await axios.get(imageUrl, { responseType: 'arraybuffer' });
const filePath = path.join(this.saveDirectory, filename);
// 根據輸出格式處理圖片
const normalizedFormat = outputFormat.toLowerCase() === 'jpeg' ? 'jpg' : outputFormat.toLowerCase();
if (normalizedFormat === 'jpg' || normalizedFormat === 'jpeg') {
// 轉換為 JPG 格式
await sharp(Buffer.from(response.data))
.jpeg({ quality: 90 })
.toFile(filePath);
}
else if (normalizedFormat === 'png') {
// 保持 PNG 格式
await sharp(Buffer.from(response.data))
.png()
.toFile(filePath);
}
else if (normalizedFormat === 'webp') {
// 轉換為 WebP 格式
await sharp(Buffer.from(response.data))
.webp({ quality: 90 })
.toFile(filePath);
}
else {
// 預設轉換為 JPG
await sharp(Buffer.from(response.data))
.jpeg({ quality: 90 })
.toFile(filePath);
}
return filePath;
}
catch (error) {
throw new Error(`圖片下載或轉換失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
async saveBase64Image(base64Data, filename, outputFormat = 'jpg') {
try {
// 解析 data URI 格式:data:image/png;base64,iVBORw0KGgoAAAANSU...
const matches = base64Data.match(/^data:image\/([a-zA-Z]+);base64,(.+)$/);
if (!matches) {
throw new Error('無效的 base64 圖片格式');
}
const [, , base64String] = matches;
const buffer = Buffer.from(base64String, 'base64');
const filePath = path.join(this.saveDirectory, filename);
// 根據輸出格式處理圖片
const normalizedFormat = outputFormat.toLowerCase() === 'jpeg' ? 'jpg' : outputFormat.toLowerCase();
if (normalizedFormat === 'jpg' || normalizedFormat === 'jpeg') {
// 轉換為 JPG 格式
await sharp(buffer)
.jpeg({ quality: 90 })
.toFile(filePath);
}
else if (normalizedFormat === 'png') {
// 保持 PNG 格式
await sharp(buffer)
.png()
.toFile(filePath);
}
else if (normalizedFormat === 'webp') {
// 轉換為 WebP 格式
await sharp(buffer)
.webp({ quality: 90 })
.toFile(filePath);
}
else {
// 預設轉換為 JPG
await sharp(buffer)
.jpeg({ quality: 90 })
.toFile(filePath);
}
return filePath;
}
catch (error) {
throw new Error(`Base64 圖片保存失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
extractImageUrlFromContent(content) {
// 嘗試從回應中提取圖片 URL
const urlPatterns = [
/https?:\/\/[^\s\)]+\.(?:jpg|jpeg|png|gif|webp)/gi,
/!\[.*?\]\((https?:\/\/[^\)]+)\)/gi,
/https?:\/\/[^\s\)]+/gi
];
for (const pattern of urlPatterns) {
const matches = content.match(pattern);
if (matches && matches.length > 0) {
const url = matches[0].replace(/^\!\[.*?\]\(/, '').replace(/\)$/, '');
if (url.match(/\.(jpg|jpeg|png|gif|webp)(\?.*)?$/i)) {
return url;
}
}
}
return null;
}
extractMultipleImageUrls(content) {
// 提取所有圖片 URL
const urlPatterns = [
/https?:\/\/[^\s\)]+\.(?:jpg|jpeg|png|gif|webp)/gi,
/!\[.*?\]\((https?:\/\/[^\)]+)\)/gi,
/https?:\/\/[^\s\)]+/gi
];
const urls = [];
for (const pattern of urlPatterns) {
const matches = content.match(pattern);
if (matches) {
for (const match of matches) {
const url = match.replace(/^\!\[.*?\]\(/, '').replace(/\)$/, '');
if (url.match(/\.(jpg|jpeg|png|gif|webp)(\?.*)?$/i)) {
urls.push(url);
}
}
}
}
// 去重並返回
return [...new Set(urls)];
}
extractBase64Images(content) {
// 提取所有 base64 圖片
const base64Pattern = /data:image\/[a-zA-Z]+;base64,[A-Za-z0-9+/=]+/gi;
const matches = content.match(base64Pattern);
return matches || [];
}
async processImageContent(content, prompt, outputFormat = 'jpg') {
const imageUrls = this.extractMultipleImageUrls(content);
const base64Images = this.extractBase64Images(content);
const savedImages = [];
// 處理 base64 圖片
if (base64Images.length > 0) {
for (let i = 0; i < base64Images.length; i++) {
try {
const filename = this.generateFilename(`${prompt}-${i + 1}`, outputFormat);
const savedPath = await this.saveBase64Image(base64Images[i], filename, outputFormat);
savedImages.push(savedPath);
}
catch (error) {
console.error(`保存第 ${i + 1} 張 base64 圖片失敗:`, error);
}
}
}
return {
imageUrls,
savedImages,
hasBase64: base64Images.length > 0
};
}
// 阿里雲 DashScope 創建任務
async createDashScopeTask(prompt, negativePrompt, options) {
if (!this.dashScopeApiKey) {
throw new Error("ALI_API_KEY 或 DASHSCOPE_API_KEY 環境變數未設定");
}
const requestData = {
model: options?.model || "wanx2.1-t2i-turbo",
input: {
prompt,
negative_prompt: negativePrompt || "人物",
},
parameters: {
size: options?.size || "1024*1024",
n: options?.n || 1,
seed: options?.seed,
prompt_extend: options?.promptExtend !== false,
watermark: options?.watermark || false,
},
};
try {
const response = await axios.post("https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis", requestData, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.dashScopeApiKey}`,
"X-DashScope-Async": "enable",
},
});
return response.data;
}
catch (error) {
if (axios.isAxiosError(error)) {
const axiosError = error;
const errorMessage = axiosError.response?.data?.message || axiosError.message;
throw new Error(`阿里雲 DashScope 創建任務失敗: ${errorMessage}`);
}
throw new Error(`創建任務時發生未知錯誤: ${error}`);
}
}
// 阿里雲 DashScope 查詢任務結果
async queryDashScopeTask(taskId) {
if (!this.dashScopeApiKey) {
throw new Error("ALI_API_KEY 或 DASHSCOPE_API_KEY 環境變數未設定");
}
try {
const response = await axios.get(`https://dashscope.aliyuncs.com/api/v1/tasks/${taskId}`, {
headers: {
"Authorization": `Bearer ${this.dashScopeApiKey}`,
},
});
return response.data;
}
catch (error) {
if (axios.isAxiosError(error)) {
const axiosError = error;
const errorMessage = axiosError.response?.data?.message || axiosError.message;
throw new Error(`阿里雲 DashScope 查詢任務失敗: ${errorMessage}`);
}
throw new Error(`查詢任務時發生未知錯誤: ${error}`);
}
}
// 阿里雲 DashScope 完整生圖流程
async generateImageWithDashScope(prompt, negativePrompt, options) {
// 步驟1: 創建任務
const createResponse = await this.createDashScopeTask(prompt, negativePrompt, options);
const taskId = createResponse.output.task_id;
if (!taskId) {
throw new Error("創建任務失敗,未獲取到任務ID");
}
// 步驟2: 輪詢查詢結果
const maxWaitMinutes = options?.maxWaitMinutes || 5; // 預設最多等待5分鐘
const maxAttempts = maxWaitMinutes * 6; // 每10秒查詢一次
let attempts = 0;
while (attempts < maxAttempts) {
const queryResponse = await this.queryDashScopeTask(taskId);
const taskStatus = queryResponse.output.task_status;
if (taskStatus === "SUCCEEDED") {
const results = queryResponse.output.results || [];
return results;
}
else if (taskStatus === "FAILED") {
throw new Error(`圖片生成失敗: ${queryResponse.output.message || "未知錯誤"}`);
}
else if (taskStatus === "CANCELED") {
throw new Error("任務已被取消");
}
// 等待10秒後再次查詢
await new Promise(resolve => setTimeout(resolve, 10000));
attempts++;
}
throw new Error(`圖片生成超時,等待時間超過 ${maxWaitMinutes} 分鐘`);
}
// 阿里雲 DashScope 視頻生成創建任務
async createVideoGenerationTask(imgUrl, prompt, options) {
if (!this.dashScopeApiKey) {
throw new Error("ALI_API_KEY 或 DASHSCOPE_API_KEY 環境變數未設定");
}
const requestData = {
model: options?.model || "wanx2.1-i2v-turbo",
input: {
img_url: imgUrl,
prompt: prompt,
template: options?.template,
},
parameters: {
resolution: options?.resolution || "720P",
duration: options?.duration || 5,
prompt_extend: options?.promptExtend !== false,
seed: options?.seed,
},
};
try {
const response = await axios.post("https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis", requestData, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.dashScopeApiKey}`,
"X-DashScope-Async": "enable",
},
});
return response.data;
}
catch (error) {
if (axios.isAxiosError(error)) {
const axiosError = error;
const errorMessage = axiosError.response?.data?.message || axiosError.message;
throw new Error(`阿里雲 DashScope 視頻生成任務創建失敗: ${errorMessage}`);
}
throw new Error(`創建視頻生成任務時發生未知錯誤: ${error}`);
}
}
// 阿里雲 DashScope 視頻生成完整流程
async generateVideoWithDashScope(imgUrl, prompt, options) {
// 步驟1: 創建任務
const createResponse = await this.createVideoGenerationTask(imgUrl, prompt, options);
const taskId = createResponse.output.task_id;
if (!taskId) {
throw new Error("創建視頻生成任務失敗,未獲取到任務ID");
}
// 步驟2: 輪詢查詢結果
const maxWaitMinutes = options?.maxWaitMinutes || 15; // 預設最多等待15分鐘(視頻生成較慢)
const maxAttempts = maxWaitMinutes * 6; // 每10秒查詢一次
let attempts = 0;
while (attempts < maxAttempts) {
const queryResponse = await this.queryDashScopeTask(taskId);
const taskStatus = queryResponse.output.task_status;
if (taskStatus === "SUCCEEDED") {
const videoUrl = queryResponse.output.video_url;
if (!videoUrl) {
throw new Error("任務完成但未獲取到視頻URL");
}
return videoUrl;
}
else if (taskStatus === "FAILED") {
throw new Error(`視頻生成失敗: ${queryResponse.output.message || "未知錯誤"}`);
}
else if (taskStatus === "CANCELED") {
throw new Error("視頻生成任務已被取消");
}
// 等待10秒後再次查詢
await new Promise(resolve => setTimeout(resolve, 10000));
attempts++;
}
throw new Error(`視頻生成超時,等待時間超過 ${maxWaitMinutes} 分鐘`);
}
setupToolHandlers() {
// 列出可用工具
this.server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: "generate_image",
description: "統一圖像生成工具,支援通義萬相和老張API多種模型",
inputSchema: {
type: "object",
properties: {
prompt: {
type: "string",
description: "圖像生成的提示詞",
},
model: {
type: "string",
description: "選擇圖像生成模型",
enum: [
// 通義萬相模型
"wanx2.1-t2i-turbo",
"wanx2.1-t2i-plus",
"wanx2.0-t2i-turbo",
// 老張API模型
"sora_image",
"gpt-4o-image",
"gemini-2.5-flash-image-preview"
],
default: "gemini-2.5-flash-image-preview",
},
negative_prompt: {
type: "string",
description: "反向提示詞,僅通義萬相模型支援(可選,預設為「人物」)",
default: "人物",
},
size: {
type: "string",
description: "輸出圖像的分辨率,僅通義萬相模型支援(可選,預設為 1024*1024)",
default: "1024*1024",
},
n: {
type: "number",
description: "生成圖片的數量,僅通義萬相模型支援(可選,範圍 1-4,預設為 1)",
minimum: 1,
maximum: 4,
default: 1,
},
seed: {
type: "number",
description: "隨機數種子,僅通義萬相模型支援(可選)",
minimum: 0,
maximum: 2147483647,
},
prompt_extend: {
type: "boolean",
description: "是否開啟 prompt 智能改寫,僅通義萬相模型支援(可選,預設為 true)",
default: true,
},
watermark: {
type: "boolean",
description: "是否添加水印標識,僅通義萬相模型支援(可選,預設為 false)",
default: false,
},
output_format: {
type: "string",
description: "輸出圖片格式(可選,預設為 jpg)",
enum: ["jpg", "jpeg", "png", "webp"],
default: "jpg",
},
system_prompt: {
type: "string",
description: "系統提示詞,僅老張API模型支援(可選)",
},
aspect_ratio: {
type: "string",
description: "圖片比例,僅老張API模型支援(可選),格式如:3:2, 16:9, 1:1",
},
max_wait_minutes: {
type: "number",
description: "最大等待時間(分鐘,可選,通義萬相預設5分鐘,老張API即時回應)",
minimum: 1,
maximum: 10,
default: 5,
},
},
required: ["prompt"],
},
},
{
name: "tongyi_wanxiang_generate_video",
description: "使用通義萬相圖生視頻完整流程(創建任務 + 等待完成 + 返回視頻URL)",
inputSchema: {
type: "object",
properties: {
img_url: {
type: "string",
description: "首幀圖像的 URL(必需,需為公網可訪問地址)",
},
prompt: {
type: "string",
description: "文本提示詞,支持中英文,長度不超過800字符(可選)",
},
model: {
type: "string",
description: "模型名稱 - turbo生成快(3-5分鐘),plus品質高(7-10分鐘)",
enum: ["wanx2.1-i2v-turbo", "wanx2.1-i2v-plus"],
default: "wanx2.1-i2v-turbo",
},
template: {
type: "string",
description: "視頻特效模板(可選):squish(解壓捏捏)、flying(魔法懸浮)、carousel(時光木馬)",
enum: ["squish", "flying", "carousel"],
},
resolution: {
type: "string",
description: "視頻分辨率檔位",
enum: ["480P", "720P"],
default: "720P",
},
duration: {
type: "number",
description: "視頻時長(秒)- turbo模型支持3-5秒,plus模型固定5秒",
minimum: 3,
maximum: 5,
default: 5,
},
prompt_extend: {
type: "boolean",
description: "是否開啟 prompt 智能改寫",
default: true,
},
seed: {
type: "number",
description: "隨機數種子",
minimum: 0,
maximum: 2147483647,
},
max_wait_minutes: {
type: "number",
description: "最大等待時間(分鐘,預設15分鐘)",
minimum: 5,
maximum: 30,
default: 15,
},
},
required: ["img_url"],
},
},
{
name: "chat_completion",
description: "使用 AI 進行對話",
inputSchema: {
type: "object",
properties: {
message: {
type: "string",
description: "用戶訊息",
},
system_prompt: {
type: "string",
description: "系統提示詞(可選,如未提供將使用預設值)",
},
model: {
type: "string",
description: "使用的模型名稱",
default: "gpt-4",
},
},
required: ["message"],
},
},
{
name: "get_usage_guide",
description: "獲取工具使用指南,包含所有可用模型的詳細說明",
inputSchema: {
type: "object",
properties: {},
required: [],
},
},
],
};
});
// 處理工具調用
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
switch (name) {
case "generate_image":
return await this.handleUnifiedImageGeneration(args);
case "tongyi_wanxiang_generate_video":
return await this.handleTongyiWanxiangGenerateVideo(args);
case "chat_completion":
return await this.handleChatCompletion(args);
case "get_usage_guide":
return await this.handleUsageGuide(args);
// 保持向後兼容
case "tongyi_wanxiang_generate_image":
return await this.handleTongyiWanxiangGenerateImage(args);
case "tongyi_wanxiang_create_task":
case "dashscope_create_task":
return await this.handleTongyiWanxiangCreateTask(args);
case "tongyi_wanxiang_query_task":
case "dashscope_query_task":
return await this.handleTongyiWanxiangQueryTask(args);
case "dashscope_generate_image":
return await this.handleTongyiWanxiangGenerateImage(args);
case "tongyi_wanxiang_create_video_task":
return await this.handleTongyiWanxiangCreateVideoTask(args);
default:
throw new McpError(ErrorCode.MethodNotFound, `未知的工具: ${name}`);
}
}
catch (error) {
if (error instanceof McpError) {
throw error;
}
throw new McpError(ErrorCode.InternalError, `工具執行錯誤: ${error instanceof Error ? error.message : String(error)}`);
}
});
}
async handleImageGeneration(args) {
const { prompt, system_prompt, model = "gpt-4o-image", output_format = "jpg", n = 1, aspect_ratio } = args;
if (!prompt) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數");
}
// 模型資訊
const modelInfo = {
'sora_image': { name: 'Sora圖像生成', provider: '老張API', feature: '基於Sora技術的圖像生成,$0.01/次' },
'gpt-4o-image': { name: 'GPT-4o圖像生成', provider: '老張API', feature: 'GPT-4o視覺模型,$0.01/次' },
'gemini-2.5-flash-image-preview': { name: 'Gemini 2.5 Flash圖像生成', provider: '老張API', feature: 'Google Gemini 2.5 Flash預覽版,高速生成,$0.01/次' }
};
const modelDesc = modelInfo[model];
// 根據老張API文檔格式,構建正確的請求
let finalPrompt = prompt;
// 如果有比例要求,添加到提示詞末尾(如:【3:2】)
if (aspect_ratio) {
finalPrompt = `${prompt}【${aspect_ratio}】`;
}
const messages = [
{
role: "user",
content: [
{
type: "text",
text: finalPrompt
}
]
}
];
const requestData = {
model,
n: Math.min(Math.max(n, 1), 4), // 限制在1-4之間
messages
};
try {
const response = await axios.post(`${this.baseUrl}/chat/completions`, requestData, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`,
},
});
const result = response.data;
const generatedContent = result.choices?.[0]?.message?.content || "未生成內容";
// 使用新的圖片處理邏輯,同時支援 URL 和 base64
const imageData = await this.processImageContent(generatedContent, prompt, output_format);
let responseText = `${modelDesc.name}圖像生成完成!\n\n`;
responseText += `🎨 使用模型: ${modelDesc.name}\n`;
responseText += `💰 費用: ${modelDesc.feature}\n`;
responseText += `📝 提示詞: ${finalPrompt}\n`;
responseText += `🖼️ 生成數量: ${n}張\n`;
if (system_prompt) {
responseText += `🔧 系統提示: ${system_prompt}\n`;
}
// 優先顯示 base64 圖片的保存結果
if (imageData.savedImages.length > 0) {
responseText += `\n💾 圖片已保存到 ${this.formatSaveDirectoryForDisplay()}:\n`;
imageData.savedImages.forEach((filePath, index) => {
const displayPath = this.formatFilePathForDisplay(filePath);
const filename = path.basename(filePath);
responseText += `${index + 1}. ${filename}\n 完整路徑: ${displayPath}\n`;
});
}
// 如果有 URL 圖片,也顯示
if (imageData.imageUrls.length > 0) {
responseText += `\n🔗 圖片URLs:\n`;
imageData.imageUrls.forEach((url, index) => {
responseText += `${index + 1}. ${url}\n`;
});
}
// 如果沒有找到任何圖片(URL 或 base64),顯示原始回應
if (!imageData.hasBase64 && imageData.imageUrls.length === 0) {
responseText += `\n📄 API回應內容:\n${generatedContent}`;
}
return {
content: [
{
type: "text",
text: responseText,
},
],
};
}
catch (error) {
if (axios.isAxiosError(error)) {
const axiosError = error;
const errorMessage = axiosError.response?.data?.error?.message || axiosError.message;
// 根據文檔建議,如果sora_image失敗,建議切換到其他模型
let suggestionText = "";
if (model === "sora_image") {
suggestionText = "\n💡 提示:如果sora_image持續失敗,建議切換到gpt-4o-image或gemini-2.5-flash-image-preview模型重試";
}
throw new McpError(ErrorCode.InternalError, `老張API請求失敗: ${errorMessage}${suggestionText}`);
}
throw new McpError(ErrorCode.InternalError, `未知錯誤: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleTongyiWanxiangCreateTask(args) {
const { prompt, model, negative_prompt, size, n, seed, prompt_extend, watermark } = args;
if (!prompt) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數");
}
try {
const response = await this.createDashScopeTask(prompt, negative_prompt, {
model,
size,
n,
seed,
promptExtend: prompt_extend,
watermark,
});
return {
content: [
{
type: "text",
text: `通義萬相文生圖任務創建成功!\n\n任務ID: ${response.output.task_id}\n任務狀態: ${response.output.task_status}\n使用模型: ${model || 'wanx2.1-t2i-turbo'}\n請求ID: ${response.request_id}\n\n請使用 tongyi_wanxiang_query_task 工具查詢任務結果。`,
},
],
};
}
catch (error) {
throw new McpError(ErrorCode.InternalError, `創建任務失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleTongyiWanxiangQueryTask(args) {
const { task_id } = args;
if (!task_id) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 task_id 參數");
}
try {
const response = await this.queryDashScopeTask(task_id);
const { output } = response;
let responseText = `通義萬相文生圖任務查詢結果:\n\n`;
responseText += `任務ID: ${output.task_id}\n`;
responseText += `任務狀態: ${output.task_status}\n`;
responseText += `請求ID: ${response.request_id}\n`;
if (output.submit_time) {
responseText += `提交時間: ${output.submit_time}\n`;
}
if (output.scheduled_time) {
responseText += `開始時間: ${output.scheduled_time}\n`;
}
if (output.end_time) {
responseText += `完成時間: ${output.end_time}\n`;
}
if (output.task_status === "SUCCEEDED" && output.results) {
responseText += `\n✅ 任務完成!生成了 ${output.results.length} 張圖片:\n`;
output.results.forEach((result, index) => {
responseText += `\n圖片 ${index + 1}:\n`;
responseText += `原始提示詞: ${result.orig_prompt}\n`;
if (result.actual_prompt) {
responseText += `優化後提示詞: ${result.actual_prompt}\n`;
}
responseText += `圖片URL: ${result.url}\n`;
});
if (output.task_metrics) {
responseText += `\n📊 任務統計:\n`;
responseText += `總計: ${output.task_metrics.TOTAL}\n`;
responseText += `成功: ${output.task_metrics.SUCCEEDED}\n`;
responseText += `失敗: ${output.task_metrics.FAILED}\n`;
}
}
else if (output.task_status === "FAILED") {
responseText += `\n❌ 任務失敗\n`;
if (output.message) {
responseText += `錯誤訊息: ${output.message}\n`;
}
}
else if (output.task_status === "PENDING") {
responseText += `\n⏳ 任務排隊中,請稍後再次查詢\n`;
}
else if (output.task_status === "RUNNING") {
responseText += `\n🔄 任務處理中,請稍後再次查詢\n`;
}
return {
content: [
{
type: "text",
text: responseText,
},
],
};
}
catch (error) {
throw new McpError(ErrorCode.InternalError, `查詢任務失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleTongyiWanxiangGenerateImage(args) {
const { prompt, model, negative_prompt, size, n, seed, prompt_extend, watermark, output_format, max_wait_minutes } = args;
if (!prompt) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數");
}
// 模型資訊
const modelInfo = {
'wanx2.1-t2i-turbo': { name: '通義萬相2.1-Turbo', price: '0.14元/張', feature: '生成速度更快' },
'wanx2.1-t2i-plus': { name: '通義萬相2.1-Plus', price: '0.20元/張', feature: '圖像細節更豐富' },
'wanx2.0-t2i-turbo': { name: '通義萬相2.0-Turbo', price: '0.04元/張', feature: '質感人像與創意設計' }
};
const selectedModel = model || 'wanx2.1-t2i-turbo';
const modelDesc = modelInfo[selectedModel];
try {
const results = await this.generateImageWithDashScope(prompt, negative_prompt, {
model: selectedModel,
size,
n,
seed,
promptExtend: prompt_extend,
watermark,
outputFormat: output_format,
maxWaitMinutes: max_wait_minutes,
});
let responseText = `通義萬相文生圖像生成完成!\n\n`;
responseText += `🎨 使用模型: ${modelDesc.name} (${modelDesc.feature})\n`;
responseText += `💰 計費: ${modelDesc.price}\n`;
responseText += `✅ 成功生成 ${results.length} 張圖片\n\n`;
results.forEach((result, index) => {
responseText += `圖片 ${index + 1}:\n`;
responseText += `原始提示詞: ${result.orig_prompt}\n`;
if (result.actual_prompt) {
responseText += `優化後提示詞: ${result.actual_prompt}\n`;
}
responseText += `圖片URL: ${result.url}\n\n`;
});
return {
content: [
{
type: "text",
text: responseText,
},
],
};
}
catch (error) {
throw new McpError(ErrorCode.InternalError, `生成圖片失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleTongyiWanxiangCreateVideoTask(args) {
const { img_url, prompt, model, template, resolution, duration, prompt_extend, seed } = args;
if (!img_url) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 img_url 參數");
}
try {
const response = await this.createVideoGenerationTask(img_url, prompt, {
model,
template,
resolution,
duration,
promptExtend: prompt_extend,
seed,
});
return {
content: [
{
type: "text",
text: `通義萬相圖生視頻任務創建成功!\n\n任務ID: ${response.output.task_id}\n任務狀態: ${response.output.task_status}\n使用模型: ${model || 'wanx2.1-i2v-turbo'}\n首幀圖像: ${img_url}\n請求ID: ${response.request_id}\n\n請使用 tongyi_wanxiang_query_task 工具查詢任務結果。`,
},
],
};
}
catch (error) {
throw new McpError(ErrorCode.InternalError, `創建視頻生成任務失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleTongyiWanxiangGenerateVideo(args) {
const { img_url, prompt, model, template, resolution, duration, prompt_extend, seed, max_wait_minutes } = args;
if (!img_url) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 img_url 參數");
}
// 模型資訊
const modelInfo = {
'wanx2.1-i2v-turbo': { name: '通義萬相2.1圖生視頻-Turbo', time: '3-5分鐘', feature: '生成速度快' },
'wanx2.1-i2v-plus': { name: '通義萬相2.1圖生視頻-Plus', time: '7-10分鐘', feature: '視頻品質高' }
};
const selectedModel = model || 'wanx2.1-i2v-turbo';
const modelDesc = modelInfo[selectedModel];
try {
const videoUrl = await this.generateVideoWithDashScope(img_url, prompt, {
model: selectedModel,
template,
resolution,
duration,
promptExtend: prompt_extend,
seed,
maxWaitMinutes: max_wait_minutes,
});
let responseText = `通義萬相圖生視頻生成完成!\n\n`;
responseText += `🎬 使用模型: ${modelDesc.name} (${modelDesc.feature})\n`;
responseText += `⏱️ 預計生成時間: ${modelDesc.time}\n`;
responseText += `🖼️ 首幀圖像: ${img_url}\n`;
responseText += `📐 分辨率: ${resolution || '720P'}\n`;
responseText += `⏰ 視頻時長: ${duration || 5}秒\n`;
if (prompt) {
responseText += `💭 提示詞: ${prompt}\n`;
}
if (template) {
responseText += `✨ 特效模板: ${template}\n`;
}
responseText += `\n✅ 視頻生成成功!\n`;
responseText += `🔗 視頻URL: ${videoUrl}\n`;
responseText += `\n⚠️ 視頻URL有效期24小時,請及時保存`;
return {
content: [
{
type: "text",
text: responseText,
},
],
};
}
catch (error) {
throw new McpError(ErrorCode.InternalError, `視頻生成失敗: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleChatCompletion(args) {
const { message, system_prompt = "You are a helpful assistant.", model = "gpt-4" } = args;
if (!message) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 message 參數");
}
const messages = [
{ role: "system", content: system_prompt },
{ role: "user", content: message },
];
const requestData = {
model,
messages,
};
try {
const response = await axios.post(`${this.baseUrl}/chat/completions`, requestData, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`,
},
});
const result = response.data;
const assistantMessage = result.choices[0]?.message?.content || "未收到回覆";
return {
content: [
{
type: "text",
text: `助手回覆:\n\n${assistantMessage}`,
},
],
};
}
catch (error) {
if (axios.isAxiosError(error)) {
const axiosError = error;
const errorMessage = axiosError.response?.data?.error?.message || axiosError.message;
throw new McpError(ErrorCode.InternalError, `API 請求失敗: ${errorMessage}`);
}
throw new McpError(ErrorCode.InternalError, `未知錯誤: ${error instanceof Error ? error.message : String(error)}`);
}
}
async handleUnifiedImageGeneration(args) {
const { prompt, model = "gemini-2.5-flash-image-preview", negative_prompt, size, n, seed, prompt_extend, watermark, output_format = "jpg", system_prompt, aspect_ratio, max_wait_minutes } = args;
if (!prompt) {
throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數");
}
// 判斷是通義萬相模型還是老張API模型
const tongyiModels = ["wanx2.1-t2i-turbo", "wanx2.1-t2i-plus", "wanx2.0-t2i-turbo"];
const laozhangModels = ["sora_image", "gpt-4o-image", "gemini-2.5-flash-image-preview"];
if (tongyiModels.includes(model)) {
// 使用通義萬相API
return await this.handleTongyiWanxiangGenerateImage({
prompt,
model,
negative_prompt,
size,
n,
seed,
prompt_extend,
watermark,
output_format,
max_wait_minutes
});
}
else if (laozhangModels.includes(model)) {
// 使用老張API
return await this.handleImageGeneration({
prompt,
model,
system_prompt,
output_format,
n,
aspect_ratio
});
}
else {
throw new McpError(ErrorCode.InvalidParams, `不支援的模型: ${model}`);
}
}
async handleUsageGuide(args) {
const guideText = `
# AI Image Chat MCP 工具使用指南
**版本**: v${packageJson.version}
**最後更新**: ${new Date().toLocaleDateString('zh-TW')}
**支援模型**: 6 種圖像生成模型 + 2 種視頻生成模型
## 🎨 統一圖像生成工具
### generate_image
**功能**: 統一圖像生成工具,支援通義萬相和老張API多種模型
**可用模型**:
### 通義萬相模型(阿里雲DashScope)
1. **wanx2.1-t2i-turbo**
- 提供商: 阿里雲
- 價格: 0.14元/張
- 特點: 生成速度更快
- 生成時間: 1-3分鐘
- 適合: 快速原型和測試
2. **wanx2.1-t2i-plus**
- 提供商: 阿里雲
- 價格: 0.20元/張
- 特點: 圖像細節更豐富
- 生成時間: 1-3分鐘
- 適合: 高品質作品和商業用途
3. **wanx2.0-t2i-turbo**
- 提供商: 阿里雲
- 價格: 0.04元/張
- 特點: 質感人像與創意設計
- 生成時間: 1-3分鐘
- 適合: 成本控制和批量生成
### 老張API模型(即時回應,$0.01/次)
4. **gemini-2.5-flash-image-preview** ⭐ (預設)
- 提供商: 老張API
- 特點: Google Gemini 2.5 Flash預覽版,高速生成
- 生成時間: 即時回應
- 適合: 快速原型設計、高效批量生成、首選模型
5. **sora_image**
- 提供商: 老張API
- 特點: 基於Sora技術的圖像生成
- 生成時間: 即時回應
- 適合: 快速圖像生成、創意探索
6. **gpt-4o-image**
- 提供商: 老張API
- 特點: GPT-4o視覺模型
- 生成時間: 即時回應
- 適合: 智能圖像理解和生成
**主要參數**:
- \`prompt\`: 圖像生成提示詞 (必需)
- \`model\`: 模型選擇 (可選,預設: "gemini-2.5-flash-image-preview")
**通義萬相模型專用參數**:
- \`negative_prompt\`: 反向提示詞 (可選,預設: "人物")
- \`size\`: 圖像尺寸 (可選,預設: "1024*1024")
- \`n\`: 生成數量 (可選,範圍: 1-4,預設: 1)
- \`seed\`: 隨機種子 (可選)
- \`prompt_extend\`: 智能改寫 (可選,預設: true)
- \`watermark\`: 添加水印 (可選,預設: false)
- \`max_wait_minutes\`: 最大等待時間 (可選,預設: 5分鐘)
**老張API模型專用參數**:
- \`system_prompt\`: 系統提示詞 (可選)
- \`output_format\`: 圖片格式 (可選,預設: "jpg")
---
## 🎬 視頻生成工具
### tongyi_wanxiang_generate_video
**功能**: 使用通義萬相圖生視頻完整流程(創建任務 + 等待完成 + 返回視頻URL)
**可用模型**:
1. **wanx2.1-i2v-turbo** (預設)
- 生成時間: 3-5分鐘
- 特點: 生成速度快
- 支援時長: 3-5秒
- 支援分辨率: 480P, 720P
2. **wanx2.1-i2v-plus**
- 生成時間: 7-10分鐘
- 特點: 視頻品質高
- 支援時長: 固定5秒
- 支援分辨率: 僅720P
**主要參數**:
- \`img_url\`: 首幀圖像URL (必需,需為公網可訪問地址)
- \`prompt\`: 文本提示詞 (可選,長度不超過800字符)
- \`model\`: 模型選擇 (可選)
- \`template\`: 視頻特效模板 (可選)
- \`resolution\`: 分辨率檔位 (可選,預設: "720P")
- \`duration\`: 視頻時長 (可選,預設: 5秒)
- \`prompt_extend\`: 智能改寫 (可選,預設: true)
- \`seed\`: 隨機種子 (可選)
- \`max_wait_minutes\`: 最大等待時間 (可選,預設: 15分鐘)
**視頻特效模板**:
- \`squish\`: 解壓捏捏效果
- \`flying\`: 魔法懸浮效果
- \`carousel\`: 時光木馬效果
**圖像要求**:
- 格式: JPEG、JPG、PNG、BMP、WEBP
- 尺寸: 360-2000像素(寬度和高度)
- 檔案大小: 不超過10MB
- URL: 必須為公網可訪問地址
---
## 💬 AI 對話工具
### chat_completion
**功能**: 使用 AI 進行對話
**主要參數**:
- \`message\`: 用戶訊息 (必需)
- \`system_prompt\`: 系統提示詞 (可選)
- \`model\`: 模型名稱 (可選,預設: "gpt-4")
---
## 📋 使用範例
### 📋 圖像生成模型詳細調用範例
#### 1. gemini-2.5-flash-image-preview (預設) - 高速生成
**特點**: Gemini 2.5 Flash預覽版 | **價格**: $0.01/次 | **時間**: 即時回應
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "一隻可愛的橘貓在陽光下睡覺",
"model": "gemini-2.5-flash-image-preview",
"aspect_ratio": "16:9",
"n": 2
}
}
\`\`\`
#### 2. wanx2.1-t2i-turbo - 速度優先
**特點**: 生成速度快 | **價格**: 0.14元/張 | **時間**: 1-3分鐘
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "一隻可愛的橘貓在陽光下睡覺",
"model": "wanx2.1-t2i-turbo",
"size": "1024*1024",
"n": 1,
"negative_prompt": "人物,文字,低質量",
"prompt_extend": true,
"watermark": false,
"max_wait_minutes": 5
}
}
\`\`\`
#### 2. wanx2.1-t2i-turbo - 速度優先
**特點**: 生成速度快 | **價格**: 0.14元/張 | **時間**: 1-3分鐘
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "一隻可愛的橘貓在陽光下睡覺",
"model": "wanx2.1-t2i-turbo",
"size": "1024*1024",
"n": 1,
"negative_prompt": "人物,文字,低質量",
"prompt_extend": true,
"watermark": false,
"max_wait_minutes": 5
}
}
\`\`\`
#### 3. wanx2.1-t2i-plus - 品質優先
**特點**: 圖像細節豐富 | **價格**: 0.20元/張 | **時間**: 1-3分鐘
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "精美的日式庭院,櫻花飄落,寧靜祥和",
"model": "wanx2.1-t2i-plus",
"size": "1024*1024",
"n": 2,
"negative_prompt": "人物,文字,模糊",
"seed": 12345,
"prompt_extend": true,
"output_format": "jpg"
}
}
\`\`\`
#### 4. wanx2.0-t2i-turbo - 性價比首選
**特點**: 質感人像與創意設計 | **價格**: 0.04元/張 | **時間**: 1-3分鐘
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "時尚的年輕女性肖像,現代都市背景",
"model": "wanx2.0-t2i-turbo",
"size": "1024*1024",
"n": 4,
"negative_prompt": "醜陋,變形,低分辨率",
"max_wait_minutes": 3
}
}
\`\`\`
#### 5. sora_image - 即時生成
**特點**: 基於Sora技術 | **價格**: $0.01/次 | **時間**: 即時回應
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "夢幻般的未來城市,科技感十足,霓虹燈光",
"model": "sora_image",
"n": 4,
"aspect_ratio": "16:9"
}
}
\`\`\`
#### 6. gpt-4o-image - 智能生成
**特點**: GPT-4o視覺模型 | **價格**: $0.01/次 | **時間**: 即時回應
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "可愛的卡通動物們在森林裡開派對",
"model": "gpt-4o-image",
"n": 2,
"aspect_ratio": "3:2",
"system_prompt": "Create a colorful, family-friendly cartoon style image"
}
}
\`\`\`
**特點**: Gemini 2.5 Flash預覽版 | **價格**: $0.01/次 | **時間**: 即時回應
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "賽博朋克風格的東京街道,雨夜霓虹",
"model": "gemini-2.5-flash-image-preview",
"n": 3,
"aspect_ratio": "16:9"
}
}
\`\`\`
#### 6. 老張API比例控制範例
**展示不同比例效果**
\`\`\`json
{
"name": "generate_image",
"arguments": {
"prompt": "一隻貓咪坐在窗台上看風景",
"model": "sora_image",
"aspect_ratio": "5:3",
"n": 1
}
}
\`\`\`
### 📋 詳細參數說明
#### 🔧 通用參數 (所有模型)
- \`**prompt**\` (必需): 圖像生成的描述詞
- \`**model**\` (可選): 選擇生成模型,預設 "gemini-2.5-flash-image-preview"
- \`**output_format**\` (可選): 圖片格式 ["jpg", "jpeg", "png", "webp"],預設 "jpg"
#### ⚙️ 通義萬相專用參數 (wanx2.x 模型)
- \`**negative_prompt**\` (可選): 反向提示詞,描述不希望出現的內容,預設 "人物"
- \`**size**\` (可選): 圖像尺寸,預設 "1024*1024"
- 支援: "512*512", "768*768", "1024*1024", "1280*720", "720*1280"
- \`**n**\` (可選): 生成圖片數量,範圍 1-4,預設 1
- \`**seed**\` (可選): 隨機種子 (0-2147483647),用於重現相同結果
- \`**prompt_extend**\` (可選): 智能改寫提示詞,預設 true
- \`**watermark**\` (可選): 添加水印標識,預設 false
- \`**max_wait_minutes**\` (可選): 最大等待時間 (1-10分鐘),預設 5分鐘
#### 🤖 老張API專用參數 (sora_image, gpt-4o-image, gemini-2.5-flash-image-preview)
- \`**system_prompt**\` (可選): 系統提示詞,用於指導AI生成風格
- \`**aspect_ratio**\` (可選): 圖片比例,格式如 "3:2", "16:9", "1:1" (會添加到提示詞末尾)
- \`**n**\` (可選): 生成圖片數量,範圍 1-4,預設 1 (注意:老張API按次計費,每次$0.01)
- \`**max_wait_minutes**\` (可選): 最大等待時間,預設 5分鐘 (通常即時回應)
### 💰 模型價格對比表
| 模型 | 提供商 | 價格 | 生成時間 | 適用場景 |
|------|---------|------|----------|----------|
| **wanx2.0-t2i-turbo** | 阿里雲 | **0.04元/張** | 1-3分鐘 | 💰 批量生成、成本控制 |
| **wanx2.1-t2i-turbo** | 阿里雲 | 0.14元/張 | 1-3分鐘 | ⚡ 快速原型、測試 |
| **wanx2.1-t2i-plus** | 阿里雲 | 0.20元/張 | 1-3分鐘 | 🎨 高品質、商業用途 |
| **sora_image** | 老張API | **$0.01/次** | 即時 | 🚀 即時生成、創意探索 |
| **gpt-4o-image** | 老張API | **$0.01/次** | 即時 | 🤖 智能理解、複雜場景 |
| **gemini-2.5-flash-image-preview** | 老張API | **$0.01/次** | 即時 | ⚡ 高速生成、高效處理 |
### 💡 老張API使用說明
- **計費方式**: 按次計費,每次調用 $0.01,不論生成幾張圖片
- **比例控制**: 在提示詞末尾自動添加【比例】格式,如【3:2】
- **重試機制**: 如果 sora_image 失敗,建議切換到 gpt-4o-image 或 gemini-2.5-flash-image-preview
- **數量控制**: 使用 n 參數控制生成數量 (1-4張)
- **格式特點**: 基於 ChatGPT PLUS 用戶的生圖請求模擬
### 🎯 模型選擇建議
**追求速度**: \`gemini-2.5-flash-image-preview\`, \`sora_image\`, \`gpt-4o-image\` (即時回應)
**注重成本**: \`wanx2.0-t2i-turbo\` (最便宜,0.04元/張)
**平衡選擇**: \`wanx2.1-t2i-turbo\` (速度與品質平衡)
**追求品質**: \`wanx2.1-t2i-plus\` (最高品質)
**創意實驗**: \`sora_image\` (Sora技術,獨特風格)
### 視頻生成範例
\`\`\`json
{
"name": "tongyi_wanxiang_generate_video",
"arguments": {
"img_url": "https://example.com/cat.jpg",
"prompt": "小貓在草地上慢慢伸懶腰",
"model": "wanx2.1-i2v-turbo",
"duration": 4,
"resolution": "720P"
}
}
\`\`\`
### 特效視頻範例
\`\`\`json