UNPKG

@bashcat/ai-image-chat-mcp

Version:

MCP server for AI image generation, video generation and chat completion with support for multiple AI providers including Tongyi Wanxiang

bashcat/ai-image-chat-mcp

1,331 lines (1,283 loc) • 58.8 kB

JavaScript

#!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from "@modelcontextprotocol/sdk/types.js"; import dotenv from "dotenv"; import axios from "axios"; import * as fs from "fs"; import * as path from "path"; import * as os from "os"; import sharp from "sharp"; import { createRequire } from "module"; const require = createRequire(import.meta.url); const packageJson = require("../package.json"); // 載入環境變數 dotenv.config(); class AIImageChatMCPServer { server; apiKey; baseUrl; saveDirectory; dashScopeApiKey; constructor() { this.server = new Server({ name: "ai-image-chat-mcp-server", version: packageJson.version, }, { capabilities: { tools: {}, }, }); // 從環境變數獲取 API 配置 this.apiKey = process.env.AI_API_KEY || ""; this.baseUrl = process.env.AI_API_BASE_URL || "https://api.laozhang.ai/v1"; this.saveDirectory = process.env.AI_IMAGE_SAVE_PATH || path.join(os.homedir(), "generated_images"); // 支援 ALI_API_KEY 和 DASHSCOPE_API_KEY 兩種環境變數名稱 this.dashScopeApiKey = process.env.ALI_API_KEY || process.env.DASHSCOPE_API_KEY || ""; if (!this.apiKey) { throw new Error("AI_API_KEY 環境變數未設定"); } // 確保保存目錄存在 if (!fs.existsSync(this.saveDirectory)) { fs.mkdirSync(this.saveDirectory, { recursive: true }); } this.setupToolHandlers(); } generateFilename(prompt, outputFormat = 'jpg') { // 清理 prompt 作為檔名，移除特殊字符並限制長度 const cleanPrompt = prompt .replace(/[^\w\s\u4e00-\u9fff]/g, '') // 保留字母、數字、空格和中文字符 .replace(/\s+/g, '-') // 將空格替換為連字符 .substring(0, 50); // 限制長度為50字符 const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); // 確保輸出格式為小寫 const extension = outputFormat.toLowerCase() === 'jpeg' ? 'jpg' : outputFormat.toLowerCase(); return `${cleanPrompt}-${timestamp}.${extension}`; } formatSaveDirectoryForDisplay() { // 如果是預設的 home 目錄路徑，顯示更友好的格式 const homeDir = os.homedir(); if (this.saveDirectory.startsWith(homeDir)) { return this.saveDirectory.replace(homeDir, '~'); } return this.saveDirectory; } formatFilePathForDisplay(filePath) { // 將絕對路徑轉換為相對於 home 目錄的顯示格式 const homeDir = os.homedir(); if (filePath.startsWith(homeDir)) { return filePath.replace(homeDir, '~'); } return filePath; } async downloadAndSaveImage(imageUrl, filename, outputFormat = 'jpg') { try { const response = await axios.get(imageUrl, { responseType: 'arraybuffer' }); const filePath = path.join(this.saveDirectory, filename); // 根據輸出格式處理圖片 const normalizedFormat = outputFormat.toLowerCase() === 'jpeg' ? 'jpg' : outputFormat.toLowerCase(); if (normalizedFormat === 'jpg' || normalizedFormat === 'jpeg') { // 轉換為 JPG 格式 await sharp(Buffer.from(response.data)) .jpeg({ quality: 90 }) .toFile(filePath); } else if (normalizedFormat === 'png') { // 保持 PNG 格式 await sharp(Buffer.from(response.data)) .png() .toFile(filePath); } else if (normalizedFormat === 'webp') { // 轉換為 WebP 格式 await sharp(Buffer.from(response.data)) .webp({ quality: 90 }) .toFile(filePath); } else { // 預設轉換為 JPG await sharp(Buffer.from(response.data)) .jpeg({ quality: 90 }) .toFile(filePath); } return filePath; } catch (error) { throw new Error(`圖片下載或轉換失敗: ${error instanceof Error ? error.message : String(error)}`); } } async saveBase64Image(base64Data, filename, outputFormat = 'jpg') { try { // 解析 data URI 格式：data:image/png;base64,iVBORw0KGgoAAAANSU... const matches = base64Data.match(/^data:image\/([a-zA-Z]+);base64,(.+)$/); if (!matches) { throw new Error('無效的 base64 圖片格式'); } const [, , base64String] = matches; const buffer = Buffer.from(base64String, 'base64'); const filePath = path.join(this.saveDirectory, filename); // 根據輸出格式處理圖片 const normalizedFormat = outputFormat.toLowerCase() === 'jpeg' ? 'jpg' : outputFormat.toLowerCase(); if (normalizedFormat === 'jpg' || normalizedFormat === 'jpeg') { // 轉換為 JPG 格式 await sharp(buffer) .jpeg({ quality: 90 }) .toFile(filePath); } else if (normalizedFormat === 'png') { // 保持 PNG 格式 await sharp(buffer) .png() .toFile(filePath); } else if (normalizedFormat === 'webp') { // 轉換為 WebP 格式 await sharp(buffer) .webp({ quality: 90 }) .toFile(filePath); } else { // 預設轉換為 JPG await sharp(buffer) .jpeg({ quality: 90 }) .toFile(filePath); } return filePath; } catch (error) { throw new Error(`Base64 圖片保存失敗: ${error instanceof Error ? error.message : String(error)}`); } } extractImageUrlFromContent(content) { // 嘗試從回應中提取圖片 URL const urlPatterns = [ /https?:\/\/[^\s\)]+\.(?:jpg|jpeg|png|gif|webp)/gi, /!\[.*?\]\((https?:\/\/[^\)]+)\)/gi, /https?:\/\/[^\s\)]+/gi ]; for (const pattern of urlPatterns) { const matches = content.match(pattern); if (matches && matches.length > 0) { const url = matches[0].replace(/^\!\[.*?\]\(/, '').replace(/\)$/, ''); if (url.match(/\.(jpg|jpeg|png|gif|webp)(\?.*)?$/i)) { return url; } } } return null; } extractMultipleImageUrls(content) { // 提取所有圖片 URL const urlPatterns = [ /https?:\/\/[^\s\)]+\.(?:jpg|jpeg|png|gif|webp)/gi, /!\[.*?\]\((https?:\/\/[^\)]+)\)/gi, /https?:\/\/[^\s\)]+/gi ]; const urls = []; for (const pattern of urlPatterns) { const matches = content.match(pattern); if (matches) { for (const match of matches) { const url = match.replace(/^\!\[.*?\]\(/, '').replace(/\)$/, ''); if (url.match(/\.(jpg|jpeg|png|gif|webp)(\?.*)?$/i)) { urls.push(url); } } } } // 去重並返回 return [...new Set(urls)]; } extractBase64Images(content) { // 提取所有 base64 圖片 const base64Pattern = /data:image\/[a-zA-Z]+;base64,[A-Za-z0-9+/=]+/gi; const matches = content.match(base64Pattern); return matches || []; } async processImageContent(content, prompt, outputFormat = 'jpg') { const imageUrls = this.extractMultipleImageUrls(content); const base64Images = this.extractBase64Images(content); const savedImages = []; // 處理 base64 圖片 if (base64Images.length > 0) { for (let i = 0; i < base64Images.length; i++) { try { const filename = this.generateFilename(`${prompt}-${i + 1}`, outputFormat); const savedPath = await this.saveBase64Image(base64Images[i], filename, outputFormat); savedImages.push(savedPath); } catch (error) { console.error(`保存第 ${i + 1} 張 base64 圖片失敗:`, error); } } } return { imageUrls, savedImages, hasBase64: base64Images.length > 0 }; } // 阿里雲 DashScope 創建任務 async createDashScopeTask(prompt, negativePrompt, options) { if (!this.dashScopeApiKey) { throw new Error("ALI_API_KEY 或 DASHSCOPE_API_KEY 環境變數未設定"); } const requestData = { model: options?.model || "wanx2.1-t2i-turbo", input: { prompt, negative_prompt: negativePrompt || "人物", }, parameters: { size: options?.size || "1024*1024", n: options?.n || 1, seed: options?.seed, prompt_extend: options?.promptExtend !== false, watermark: options?.watermark || false, }, }; try { const response = await axios.post("https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis", requestData, { headers: { "Content-Type": "application/json", "Authorization": `Bearer ${this.dashScopeApiKey}`, "X-DashScope-Async": "enable", }, }); return response.data; } catch (error) { if (axios.isAxiosError(error)) { const axiosError = error; const errorMessage = axiosError.response?.data?.message || axiosError.message; throw new Error(`阿里雲 DashScope 創建任務失敗: ${errorMessage}`); } throw new Error(`創建任務時發生未知錯誤: ${error}`); } } // 阿里雲 DashScope 查詢任務結果 async queryDashScopeTask(taskId) { if (!this.dashScopeApiKey) { throw new Error("ALI_API_KEY 或 DASHSCOPE_API_KEY 環境變數未設定"); } try { const response = await axios.get(`https://dashscope.aliyuncs.com/api/v1/tasks/${taskId}`, { headers: { "Authorization": `Bearer ${this.dashScopeApiKey}`, }, }); return response.data; } catch (error) { if (axios.isAxiosError(error)) { const axiosError = error; const errorMessage = axiosError.response?.data?.message || axiosError.message; throw new Error(`阿里雲 DashScope 查詢任務失敗: ${errorMessage}`); } throw new Error(`查詢任務時發生未知錯誤: ${error}`); } } // 阿里雲 DashScope 完整生圖流程 async generateImageWithDashScope(prompt, negativePrompt, options) { // 步驟1: 創建任務 const createResponse = await this.createDashScopeTask(prompt, negativePrompt, options); const taskId = createResponse.output.task_id; if (!taskId) { throw new Error("創建任務失敗，未獲取到任務ID"); } // 步驟2: 輪詢查詢結果 const maxWaitMinutes = options?.maxWaitMinutes || 5; // 預設最多等待5分鐘 const maxAttempts = maxWaitMinutes * 6; // 每10秒查詢一次 let attempts = 0; while (attempts < maxAttempts) { const queryResponse = await this.queryDashScopeTask(taskId); const taskStatus = queryResponse.output.task_status; if (taskStatus === "SUCCEEDED") { const results = queryResponse.output.results || []; return results; } else if (taskStatus === "FAILED") { throw new Error(`圖片生成失敗: ${queryResponse.output.message || "未知錯誤"}`); } else if (taskStatus === "CANCELED") { throw new Error("任務已被取消"); } // 等待10秒後再次查詢 await new Promise(resolve => setTimeout(resolve, 10000)); attempts++; } throw new Error(`圖片生成超時，等待時間超過 ${maxWaitMinutes} 分鐘`); } // 阿里雲 DashScope 視頻生成創建任務 async createVideoGenerationTask(imgUrl, prompt, options) { if (!this.dashScopeApiKey) { throw new Error("ALI_API_KEY 或 DASHSCOPE_API_KEY 環境變數未設定"); } const requestData = { model: options?.model || "wanx2.1-i2v-turbo", input: { img_url: imgUrl, prompt: prompt, template: options?.template, }, parameters: { resolution: options?.resolution || "720P", duration: options?.duration || 5, prompt_extend: options?.promptExtend !== false, seed: options?.seed, }, }; try { const response = await axios.post("https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis", requestData, { headers: { "Content-Type": "application/json", "Authorization": `Bearer ${this.dashScopeApiKey}`, "X-DashScope-Async": "enable", }, }); return response.data; } catch (error) { if (axios.isAxiosError(error)) { const axiosError = error; const errorMessage = axiosError.response?.data?.message || axiosError.message; throw new Error(`阿里雲 DashScope 視頻生成任務創建失敗: ${errorMessage}`); } throw new Error(`創建視頻生成任務時發生未知錯誤: ${error}`); } } // 阿里雲 DashScope 視頻生成完整流程 async generateVideoWithDashScope(imgUrl, prompt, options) { // 步驟1: 創建任務 const createResponse = await this.createVideoGenerationTask(imgUrl, prompt, options); const taskId = createResponse.output.task_id; if (!taskId) { throw new Error("創建視頻生成任務失敗，未獲取到任務ID"); } // 步驟2: 輪詢查詢結果 const maxWaitMinutes = options?.maxWaitMinutes || 15; // 預設最多等待15分鐘（視頻生成較慢） const maxAttempts = maxWaitMinutes * 6; // 每10秒查詢一次 let attempts = 0; while (attempts < maxAttempts) { const queryResponse = await this.queryDashScopeTask(taskId); const taskStatus = queryResponse.output.task_status; if (taskStatus === "SUCCEEDED") { const videoUrl = queryResponse.output.video_url; if (!videoUrl) { throw new Error("任務完成但未獲取到視頻URL"); } return videoUrl; } else if (taskStatus === "FAILED") { throw new Error(`視頻生成失敗: ${queryResponse.output.message || "未知錯誤"}`); } else if (taskStatus === "CANCELED") { throw new Error("視頻生成任務已被取消"); } // 等待10秒後再次查詢 await new Promise(resolve => setTimeout(resolve, 10000)); attempts++; } throw new Error(`視頻生成超時，等待時間超過 ${maxWaitMinutes} 分鐘`); } setupToolHandlers() { // 列出可用工具 this.server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: "generate_image", description: "統一圖像生成工具，支援通義萬相和老張API多種模型", inputSchema: { type: "object", properties: { prompt: { type: "string", description: "圖像生成的提示詞", }, model: { type: "string", description: "選擇圖像生成模型", enum: [ // 通義萬相模型 "wanx2.1-t2i-turbo", "wanx2.1-t2i-plus", "wanx2.0-t2i-turbo", // 老張API模型 "sora_image", "gpt-4o-image", "gemini-2.5-flash-image-preview" ], default: "gemini-2.5-flash-image-preview", }, negative_prompt: { type: "string", description: "反向提示詞，僅通義萬相模型支援（可選，預設為「人物」）", default: "人物", }, size: { type: "string", description: "輸出圖像的分辨率，僅通義萬相模型支援（可選，預設為 1024*1024）", default: "1024*1024", }, n: { type: "number", description: "生成圖片的數量，僅通義萬相模型支援（可選，範圍 1-4，預設為 1）", minimum: 1, maximum: 4, default: 1, }, seed: { type: "number", description: "隨機數種子，僅通義萬相模型支援（可選）", minimum: 0, maximum: 2147483647, }, prompt_extend: { type: "boolean", description: "是否開啟 prompt 智能改寫，僅通義萬相模型支援（可選，預設為 true）", default: true, }, watermark: { type: "boolean", description: "是否添加水印標識，僅通義萬相模型支援（可選，預設為 false）", default: false, }, output_format: { type: "string", description: "輸出圖片格式（可選，預設為 jpg）", enum: ["jpg", "jpeg", "png", "webp"], default: "jpg", }, system_prompt: { type: "string", description: "系統提示詞，僅老張API模型支援（可選）", }, aspect_ratio: { type: "string", description: "圖片比例，僅老張API模型支援（可選），格式如：3:2, 16:9, 1:1", }, max_wait_minutes: { type: "number", description: "最大等待時間（分鐘，可選，通義萬相預設5分鐘，老張API即時回應）", minimum: 1, maximum: 10, default: 5, }, }, required: ["prompt"], }, }, { name: "tongyi_wanxiang_generate_video", description: "使用通義萬相圖生視頻完整流程（創建任務 + 等待完成 + 返回視頻URL）", inputSchema: { type: "object", properties: { img_url: { type: "string", description: "首幀圖像的 URL（必需，需為公網可訪問地址）", }, prompt: { type: "string", description: "文本提示詞，支持中英文，長度不超過800字符（可選）", }, model: { type: "string", description: "模型名稱 - turbo生成快(3-5分鐘)，plus品質高(7-10分鐘)", enum: ["wanx2.1-i2v-turbo", "wanx2.1-i2v-plus"], default: "wanx2.1-i2v-turbo", }, template: { type: "string", description: "視頻特效模板（可選）：squish（解壓捏捏）、flying（魔法懸浮）、carousel（時光木馬）", enum: ["squish", "flying", "carousel"], }, resolution: { type: "string", description: "視頻分辨率檔位", enum: ["480P", "720P"], default: "720P", }, duration: { type: "number", description: "視頻時長（秒）- turbo模型支持3-5秒，plus模型固定5秒", minimum: 3, maximum: 5, default: 5, }, prompt_extend: { type: "boolean", description: "是否開啟 prompt 智能改寫", default: true, }, seed: { type: "number", description: "隨機數種子", minimum: 0, maximum: 2147483647, }, max_wait_minutes: { type: "number", description: "最大等待時間（分鐘，預設15分鐘）", minimum: 5, maximum: 30, default: 15, }, }, required: ["img_url"], }, }, { name: "chat_completion", description: "使用 AI 進行對話", inputSchema: { type: "object", properties: { message: { type: "string", description: "用戶訊息", }, system_prompt: { type: "string", description: "系統提示詞（可選，如未提供將使用預設值）", }, model: { type: "string", description: "使用的模型名稱", default: "gpt-4", }, }, required: ["message"], }, }, { name: "get_usage_guide", description: "獲取工具使用指南，包含所有可用模型的詳細說明", inputSchema: { type: "object", properties: {}, required: [], }, }, ], }; }); // 處理工具調用 this.server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case "generate_image": return await this.handleUnifiedImageGeneration(args); case "tongyi_wanxiang_generate_video": return await this.handleTongyiWanxiangGenerateVideo(args); case "chat_completion": return await this.handleChatCompletion(args); case "get_usage_guide": return await this.handleUsageGuide(args); // 保持向後兼容 case "tongyi_wanxiang_generate_image": return await this.handleTongyiWanxiangGenerateImage(args); case "tongyi_wanxiang_create_task": case "dashscope_create_task": return await this.handleTongyiWanxiangCreateTask(args); case "tongyi_wanxiang_query_task": case "dashscope_query_task": return await this.handleTongyiWanxiangQueryTask(args); case "dashscope_generate_image": return await this.handleTongyiWanxiangGenerateImage(args); case "tongyi_wanxiang_create_video_task": return await this.handleTongyiWanxiangCreateVideoTask(args); default: throw new McpError(ErrorCode.MethodNotFound, `未知的工具: ${name}`); } } catch (error) { if (error instanceof McpError) { throw error; } throw new McpError(ErrorCode.InternalError, `工具執行錯誤: ${error instanceof Error ? error.message : String(error)}`); } }); } async handleImageGeneration(args) { const { prompt, system_prompt, model = "gpt-4o-image", output_format = "jpg", n = 1, aspect_ratio } = args; if (!prompt) { throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數"); } // 模型資訊 const modelInfo = { 'sora_image': { name: 'Sora圖像生成', provider: '老張API', feature: '基於Sora技術的圖像生成，$0.01/次' }, 'gpt-4o-image': { name: 'GPT-4o圖像生成', provider: '老張API', feature: 'GPT-4o視覺模型，$0.01/次' }, 'gemini-2.5-flash-image-preview': { name: 'Gemini 2.5 Flash圖像生成', provider: '老張API', feature: 'Google Gemini 2.5 Flash預覽版，高速生成，$0.01/次' } }; const modelDesc = modelInfo[model]; // 根據老張API文檔格式，構建正確的請求 let finalPrompt = prompt; // 如果有比例要求，添加到提示詞末尾（如：【3:2】） if (aspect_ratio) { finalPrompt = `${prompt}【${aspect_ratio}】`; } const messages = [ { role: "user", content: [ { type: "text", text: finalPrompt } ] } ]; const requestData = { model, n: Math.min(Math.max(n, 1), 4), // 限制在1-4之間 messages }; try { const response = await axios.post(`${this.baseUrl}/chat/completions`, requestData, { headers: { "Content-Type": "application/json", "Authorization": `Bearer ${this.apiKey}`, }, }); const result = response.data; const generatedContent = result.choices?.[0]?.message?.content || "未生成內容"; // 使用新的圖片處理邏輯，同時支援 URL 和 base64 const imageData = await this.processImageContent(generatedContent, prompt, output_format); let responseText = `${modelDesc.name}圖像生成完成！\n\n`; responseText += `🎨 使用模型: ${modelDesc.name}\n`; responseText += `💰 費用: ${modelDesc.feature}\n`; responseText += `📝 提示詞: ${finalPrompt}\n`; responseText += `🖼️ 生成數量: ${n}張\n`; if (system_prompt) { responseText += `🔧 系統提示: ${system_prompt}\n`; } // 優先顯示 base64 圖片的保存結果 if (imageData.savedImages.length > 0) { responseText += `\n💾 圖片已保存到 ${this.formatSaveDirectoryForDisplay()}:\n`; imageData.savedImages.forEach((filePath, index) => { const displayPath = this.formatFilePathForDisplay(filePath); const filename = path.basename(filePath); responseText += `${index + 1}. ${filename}\n 完整路徑: ${displayPath}\n`; }); } // 如果有 URL 圖片，也顯示 if (imageData.imageUrls.length > 0) { responseText += `\n🔗 圖片URLs:\n`; imageData.imageUrls.forEach((url, index) => { responseText += `${index + 1}. ${url}\n`; }); } // 如果沒有找到任何圖片（URL 或 base64），顯示原始回應 if (!imageData.hasBase64 && imageData.imageUrls.length === 0) { responseText += `\n📄 API回應內容:\n${generatedContent}`; } return { content: [ { type: "text", text: responseText, }, ], }; } catch (error) { if (axios.isAxiosError(error)) { const axiosError = error; const errorMessage = axiosError.response?.data?.error?.message || axiosError.message; // 根據文檔建議，如果sora_image失敗，建議切換到其他模型 let suggestionText = ""; if (model === "sora_image") { suggestionText = "\n💡 提示：如果sora_image持續失敗，建議切換到gpt-4o-image或gemini-2.5-flash-image-preview模型重試"; } throw new McpError(ErrorCode.InternalError, `老張API請求失敗: ${errorMessage}${suggestionText}`); } throw new McpError(ErrorCode.InternalError, `未知錯誤: ${error instanceof Error ? error.message : String(error)}`); } } async handleTongyiWanxiangCreateTask(args) { const { prompt, model, negative_prompt, size, n, seed, prompt_extend, watermark } = args; if (!prompt) { throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數"); } try { const response = await this.createDashScopeTask(prompt, negative_prompt, { model, size, n, seed, promptExtend: prompt_extend, watermark, }); return { content: [ { type: "text", text: `通義萬相文生圖任務創建成功！\n\n任務ID: ${response.output.task_id}\n任務狀態: ${response.output.task_status}\n使用模型: ${model || 'wanx2.1-t2i-turbo'}\n請求ID: ${response.request_id}\n\n請使用 tongyi_wanxiang_query_task 工具查詢任務結果。`, }, ], }; } catch (error) { throw new McpError(ErrorCode.InternalError, `創建任務失敗: ${error instanceof Error ? error.message : String(error)}`); } } async handleTongyiWanxiangQueryTask(args) { const { task_id } = args; if (!task_id) { throw new McpError(ErrorCode.InvalidParams, "需要提供 task_id 參數"); } try { const response = await this.queryDashScopeTask(task_id); const { output } = response; let responseText = `通義萬相文生圖任務查詢結果：\n\n`; responseText += `任務ID: ${output.task_id}\n`; responseText += `任務狀態: ${output.task_status}\n`; responseText += `請求ID: ${response.request_id}\n`; if (output.submit_time) { responseText += `提交時間: ${output.submit_time}\n`; } if (output.scheduled_time) { responseText += `開始時間: ${output.scheduled_time}\n`; } if (output.end_time) { responseText += `完成時間: ${output.end_time}\n`; } if (output.task_status === "SUCCEEDED" && output.results) { responseText += `\n✅ 任務完成！生成了 ${output.results.length} 張圖片：\n`; output.results.forEach((result, index) => { responseText += `\n圖片 ${index + 1}:\n`; responseText += `原始提示詞: ${result.orig_prompt}\n`; if (result.actual_prompt) { responseText += `優化後提示詞: ${result.actual_prompt}\n`; } responseText += `圖片URL: ${result.url}\n`; }); if (output.task_metrics) { responseText += `\n📊 任務統計:\n`; responseText += `總計: ${output.task_metrics.TOTAL}\n`; responseText += `成功: ${output.task_metrics.SUCCEEDED}\n`; responseText += `失敗: ${output.task_metrics.FAILED}\n`; } } else if (output.task_status === "FAILED") { responseText += `\n❌ 任務失敗\n`; if (output.message) { responseText += `錯誤訊息: ${output.message}\n`; } } else if (output.task_status === "PENDING") { responseText += `\n⏳ 任務排隊中，請稍後再次查詢\n`; } else if (output.task_status === "RUNNING") { responseText += `\n🔄 任務處理中，請稍後再次查詢\n`; } return { content: [ { type: "text", text: responseText, }, ], }; } catch (error) { throw new McpError(ErrorCode.InternalError, `查詢任務失敗: ${error instanceof Error ? error.message : String(error)}`); } } async handleTongyiWanxiangGenerateImage(args) { const { prompt, model, negative_prompt, size, n, seed, prompt_extend, watermark, output_format, max_wait_minutes } = args; if (!prompt) { throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數"); } // 模型資訊 const modelInfo = { 'wanx2.1-t2i-turbo': { name: '通義萬相2.1-Turbo', price: '0.14元/張', feature: '生成速度更快' }, 'wanx2.1-t2i-plus': { name: '通義萬相2.1-Plus', price: '0.20元/張', feature: '圖像細節更豐富' }, 'wanx2.0-t2i-turbo': { name: '通義萬相2.0-Turbo', price: '0.04元/張', feature: '質感人像與創意設計' } }; const selectedModel = model || 'wanx2.1-t2i-turbo'; const modelDesc = modelInfo[selectedModel]; try { const results = await this.generateImageWithDashScope(prompt, negative_prompt, { model: selectedModel, size, n, seed, promptExtend: prompt_extend, watermark, outputFormat: output_format, maxWaitMinutes: max_wait_minutes, }); let responseText = `通義萬相文生圖像生成完成！\n\n`; responseText += `🎨 使用模型: ${modelDesc.name} (${modelDesc.feature})\n`; responseText += `💰 計費: ${modelDesc.price}\n`; responseText += `✅ 成功生成 ${results.length} 張圖片\n\n`; results.forEach((result, index) => { responseText += `圖片 ${index + 1}:\n`; responseText += `原始提示詞: ${result.orig_prompt}\n`; if (result.actual_prompt) { responseText += `優化後提示詞: ${result.actual_prompt}\n`; } responseText += `圖片URL: ${result.url}\n\n`; }); return { content: [ { type: "text", text: responseText, }, ], }; } catch (error) { throw new McpError(ErrorCode.InternalError, `生成圖片失敗: ${error instanceof Error ? error.message : String(error)}`); } } async handleTongyiWanxiangCreateVideoTask(args) { const { img_url, prompt, model, template, resolution, duration, prompt_extend, seed } = args; if (!img_url) { throw new McpError(ErrorCode.InvalidParams, "需要提供 img_url 參數"); } try { const response = await this.createVideoGenerationTask(img_url, prompt, { model, template, resolution, duration, promptExtend: prompt_extend, seed, }); return { content: [ { type: "text", text: `通義萬相圖生視頻任務創建成功！\n\n任務ID: ${response.output.task_id}\n任務狀態: ${response.output.task_status}\n使用模型: ${model || 'wanx2.1-i2v-turbo'}\n首幀圖像: ${img_url}\n請求ID: ${response.request_id}\n\n請使用 tongyi_wanxiang_query_task 工具查詢任務結果。`, }, ], }; } catch (error) { throw new McpError(ErrorCode.InternalError, `創建視頻生成任務失敗: ${error instanceof Error ? error.message : String(error)}`); } } async handleTongyiWanxiangGenerateVideo(args) { const { img_url, prompt, model, template, resolution, duration, prompt_extend, seed, max_wait_minutes } = args; if (!img_url) { throw new McpError(ErrorCode.InvalidParams, "需要提供 img_url 參數"); } // 模型資訊 const modelInfo = { 'wanx2.1-i2v-turbo': { name: '通義萬相2.1圖生視頻-Turbo', time: '3-5分鐘', feature: '生成速度快' }, 'wanx2.1-i2v-plus': { name: '通義萬相2.1圖生視頻-Plus', time: '7-10分鐘', feature: '視頻品質高' } }; const selectedModel = model || 'wanx2.1-i2v-turbo'; const modelDesc = modelInfo[selectedModel]; try { const videoUrl = await this.generateVideoWithDashScope(img_url, prompt, { model: selectedModel, template, resolution, duration, promptExtend: prompt_extend, seed, maxWaitMinutes: max_wait_minutes, }); let responseText = `通義萬相圖生視頻生成完成！\n\n`; responseText += `🎬 使用模型: ${modelDesc.name} (${modelDesc.feature})\n`; responseText += `⏱️ 預計生成時間: ${modelDesc.time}\n`; responseText += `🖼️ 首幀圖像: ${img_url}\n`; responseText += `📐 分辨率: ${resolution || '720P'}\n`; responseText += `⏰ 視頻時長: ${duration || 5}秒\n`; if (prompt) { responseText += `💭 提示詞: ${prompt}\n`; } if (template) { responseText += `✨ 特效模板: ${template}\n`; } responseText += `\n✅ 視頻生成成功！\n`; responseText += `🔗 視頻URL: ${videoUrl}\n`; responseText += `\n⚠️ 視頻URL有效期24小時，請及時保存`; return { content: [ { type: "text", text: responseText, }, ], }; } catch (error) { throw new McpError(ErrorCode.InternalError, `視頻生成失敗: ${error instanceof Error ? error.message : String(error)}`); } } async handleChatCompletion(args) { const { message, system_prompt = "You are a helpful assistant.", model = "gpt-4" } = args; if (!message) { throw new McpError(ErrorCode.InvalidParams, "需要提供 message 參數"); } const messages = [ { role: "system", content: system_prompt }, { role: "user", content: message }, ]; const requestData = { model, messages, }; try { const response = await axios.post(`${this.baseUrl}/chat/completions`, requestData, { headers: { "Content-Type": "application/json", "Authorization": `Bearer ${this.apiKey}`, }, }); const result = response.data; const assistantMessage = result.choices[0]?.message?.content || "未收到回覆"; return { content: [ { type: "text", text: `助手回覆:\n\n${assistantMessage}`, }, ], }; } catch (error) { if (axios.isAxiosError(error)) { const axiosError = error; const errorMessage = axiosError.response?.data?.error?.message || axiosError.message; throw new McpError(ErrorCode.InternalError, `API 請求失敗: ${errorMessage}`); } throw new McpError(ErrorCode.InternalError, `未知錯誤: ${error instanceof Error ? error.message : String(error)}`); } } async handleUnifiedImageGeneration(args) { const { prompt, model = "gemini-2.5-flash-image-preview", negative_prompt, size, n, seed, prompt_extend, watermark, output_format = "jpg", system_prompt, aspect_ratio, max_wait_minutes } = args; if (!prompt) { throw new McpError(ErrorCode.InvalidParams, "需要提供 prompt 參數"); } // 判斷是通義萬相模型還是老張API模型 const tongyiModels = ["wanx2.1-t2i-turbo", "wanx2.1-t2i-plus", "wanx2.0-t2i-turbo"]; const laozhangModels = ["sora_image", "gpt-4o-image", "gemini-2.5-flash-image-preview"]; if (tongyiModels.includes(model)) { // 使用通義萬相API return await this.handleTongyiWanxiangGenerateImage({ prompt, model, negative_prompt, size, n, seed, prompt_extend, watermark, output_format, max_wait_minutes }); } else if (laozhangModels.includes(model)) { // 使用老張API return await this.handleImageGeneration({ prompt, model, system_prompt, output_format, n, aspect_ratio }); } else { throw new McpError(ErrorCode.InvalidParams, `不支援的模型: ${model}`); } } async handleUsageGuide(args) { const guideText = ` # AI Image Chat MCP 工具使用指南 **版本**: v${packageJson.version} **最後更新**: ${new Date().toLocaleDateString('zh-TW')} **支援模型**: 6 種圖像生成模型 + 2 種視頻生成模型 ## 🎨 統一圖像生成工具 ### generate_image **功能**: 統一圖像生成工具，支援通義萬相和老張API多種模型 **可用模型**: ### 通義萬相模型（阿里雲DashScope） 1. **wanx2.1-t2i-turbo** - 提供商: 阿里雲 - 價格: 0.14元/張 - 特點: 生成速度更快 - 生成時間: 1-3分鐘 - 適合: 快速原型和測試 2. **wanx2.1-t2i-plus** - 提供商: 阿里雲 - 價格: 0.20元/張 - 特點: 圖像細節更豐富 - 生成時間: 1-3分鐘 - 適合: 高品質作品和商業用途 3. **wanx2.0-t2i-turbo** - 提供商: 阿里雲 - 價格: 0.04元/張 - 特點: 質感人像與創意設計 - 生成時間: 1-3分鐘 - 適合: 成本控制和批量生成 ### 老張API模型（即時回應，$0.01/次） 4. **gemini-2.5-flash-image-preview** ⭐ (預設) - 提供商: 老張API - 特點: Google Gemini 2.5 Flash預覽版，高速生成 - 生成時間: 即時回應 - 適合: 快速原型設計、高效批量生成、首選模型 5. **sora_image** - 提供商: 老張API - 特點: 基於Sora技術的圖像生成 - 生成時間: 即時回應 - 適合: 快速圖像生成、創意探索 6. **gpt-4o-image** - 提供商: 老張API - 特點: GPT-4o視覺模型 - 生成時間: 即時回應 - 適合: 智能圖像理解和生成 **主要參數**: - \`prompt\`: 圖像生成提示詞 (必需) - \`model\`: 模型選擇 (可選，預設: "gemini-2.5-flash-image-preview") **通義萬相模型專用參數**: - \`negative_prompt\`: 反向提示詞 (可選，預設: "人物") - \`size\`: 圖像尺寸 (可選，預設: "1024*1024") - \`n\`: 生成數量 (可選，範圍: 1-4，預設: 1) - \`seed\`: 隨機種子 (可選) - \`prompt_extend\`: 智能改寫 (可選，預設: true) - \`watermark\`: 添加水印 (可選，預設: false) - \`max_wait_minutes\`: 最大等待時間 (可選，預設: 5分鐘) **老張API模型專用參數**: - \`system_prompt\`: 系統提示詞 (可選) - \`output_format\`: 圖片格式 (可選，預設: "jpg") --- ## 🎬 視頻生成工具 ### tongyi_wanxiang_generate_video **功能**: 使用通義萬相圖生視頻完整流程（創建任務 + 等待完成 + 返回視頻URL） **可用模型**: 1. **wanx2.1-i2v-turbo** (預設) - 生成時間: 3-5分鐘 - 特點: 生成速度快 - 支援時長: 3-5秒 - 支援分辨率: 480P, 720P 2. **wanx2.1-i2v-plus** - 生成時間: 7-10分鐘 - 特點: 視頻品質高 - 支援時長: 固定5秒 - 支援分辨率: 僅720P **主要參數**: - \`img_url\`: 首幀圖像URL (必需，需為公網可訪問地址) - \`prompt\`: 文本提示詞 (可選，長度不超過800字符) - \`model\`: 模型選擇 (可選) - \`template\`: 視頻特效模板 (可選) - \`resolution\`: 分辨率檔位 (可選，預設: "720P") - \`duration\`: 視頻時長 (可選，預設: 5秒) - \`prompt_extend\`: 智能改寫 (可選，預設: true) - \`seed\`: 隨機種子 (可選) - \`max_wait_minutes\`: 最大等待時間 (可選，預設: 15分鐘) **視頻特效模板**: - \`squish\`: 解壓捏捏效果 - \`flying\`: 魔法懸浮效果 - \`carousel\`: 時光木馬效果 **圖像要求**: - 格式: JPEG、JPG、PNG、BMP、WEBP - 尺寸: 360-2000像素（寬度和高度） - 檔案大小: 不超過10MB - URL: 必須為公網可訪問地址 --- ## 💬 AI 對話工具 ### chat_completion **功能**: 使用 AI 進行對話 **主要參數**: - \`message\`: 用戶訊息 (必需) - \`system_prompt\`: 系統提示詞 (可選) - \`model\`: 模型名稱 (可選，預設: "gpt-4") --- ## 📋 使用範例 ### 📋 圖像生成模型詳細調用範例 #### 1. gemini-2.5-flash-image-preview (預設) - 高速生成 **特點**: Gemini 2.5 Flash預覽版 | **價格**: $0.01/次 | **時間**: 即時回應 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "一隻可愛的橘貓在陽光下睡覺", "model": "gemini-2.5-flash-image-preview", "aspect_ratio": "16:9", "n": 2 } } \`\`\` #### 2. wanx2.1-t2i-turbo - 速度優先 **特點**: 生成速度快 | **價格**: 0.14元/張 | **時間**: 1-3分鐘 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "一隻可愛的橘貓在陽光下睡覺", "model": "wanx2.1-t2i-turbo", "size": "1024*1024", "n": 1, "negative_prompt": "人物，文字，低質量", "prompt_extend": true, "watermark": false, "max_wait_minutes": 5 } } \`\`\` #### 2. wanx2.1-t2i-turbo - 速度優先 **特點**: 生成速度快 | **價格**: 0.14元/張 | **時間**: 1-3分鐘 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "一隻可愛的橘貓在陽光下睡覺", "model": "wanx2.1-t2i-turbo", "size": "1024*1024", "n": 1, "negative_prompt": "人物，文字，低質量", "prompt_extend": true, "watermark": false, "max_wait_minutes": 5 } } \`\`\` #### 3. wanx2.1-t2i-plus - 品質優先 **特點**: 圖像細節豐富 | **價格**: 0.20元/張 | **時間**: 1-3分鐘 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "精美的日式庭院，櫻花飄落，寧靜祥和", "model": "wanx2.1-t2i-plus", "size": "1024*1024", "n": 2, "negative_prompt": "人物，文字，模糊", "seed": 12345, "prompt_extend": true, "output_format": "jpg" } } \`\`\` #### 4. wanx2.0-t2i-turbo - 性價比首選 **特點**: 質感人像與創意設計 | **價格**: 0.04元/張 | **時間**: 1-3分鐘 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "時尚的年輕女性肖像，現代都市背景", "model": "wanx2.0-t2i-turbo", "size": "1024*1024", "n": 4, "negative_prompt": "醜陋，變形，低分辨率", "max_wait_minutes": 3 } } \`\`\` #### 5. sora_image - 即時生成 **特點**: 基於Sora技術 | **價格**: $0.01/次 | **時間**: 即時回應 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "夢幻般的未來城市，科技感十足，霓虹燈光", "model": "sora_image", "n": 4, "aspect_ratio": "16:9" } } \`\`\` #### 6. gpt-4o-image - 智能生成 **特點**: GPT-4o視覺模型 | **價格**: $0.01/次 | **時間**: 即時回應 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "可愛的卡通動物們在森林裡開派對", "model": "gpt-4o-image", "n": 2, "aspect_ratio": "3:2", "system_prompt": "Create a colorful, family-friendly cartoon style image" } } \`\`\` **特點**: Gemini 2.5 Flash預覽版 | **價格**: $0.01/次 | **時間**: 即時回應 \`\`\`json { "name": "generate_image", "arguments": { "prompt": "賽博朋克風格的東京街道，雨夜霓虹", "model": "gemini-2.5-flash-image-preview", "n": 3, "aspect_ratio": "16:9" } } \`\`\` #### 6. 老張API比例控制範例 **展示不同比例效果** \`\`\`json { "name": "generate_image", "arguments": { "prompt": "一隻貓咪坐在窗台上看風景", "model": "sora_image", "aspect_ratio": "5:3", "n": 1 } } \`\`\` ### 📋 詳細參數說明 #### 🔧 通用參數 (所有模型) - \`**prompt**\` (必需): 圖像生成的描述詞 - \`**model**\` (可選): 選擇生成模型，預設 "gemini-2.5-flash-image-preview" - \`**output_format**\` (可選): 圖片格式 ["jpg", "jpeg", "png", "webp"]，預設 "jpg" #### ⚙️ 通義萬相專用參數 (wanx2.x 模型) - \`**negative_prompt**\` (可選): 反向提示詞，描述不希望出現的內容，預設 "人物" - \`**size**\` (可選): 圖像尺寸，預設 "1024*1024" - 支援: "512*512", "768*768", "1024*1024", "1280*720", "720*1280" - \`**n**\` (可選): 生成圖片數量，範圍 1-4，預設 1 - \`**seed**\` (可選): 隨機種子 (0-2147483647)，用於重現相同結果 - \`**prompt_extend**\` (可選): 智能改寫提示詞，預設 true - \`**watermark**\` (可選): 添加水印標識，預設 false - \`**max_wait_minutes**\` (可選): 最大等待時間 (1-10分鐘)，預設 5分鐘 #### 🤖 老張API專用參數 (sora_image, gpt-4o-image, gemini-2.5-flash-image-preview) - \`**system_prompt**\` (可選): 系統提示詞，用於指導AI生成風格 - \`**aspect_ratio**\` (可選): 圖片比例，格式如 "3:2", "16:9", "1:1" (會添加到提示詞末尾) - \`**n**\` (可選): 生成圖片數量，範圍 1-4，預設 1 (注意：老張API按次計費，每次$0.01) - \`**max_wait_minutes**\` (可選): 最大等待時間，預設 5分鐘 (通常即時回應) ### 💰 模型價格對比表 | 模型 | 提供商 | 價格 | 生成時間 | 適用場景 | |------|---------|------|----------|----------| | **wanx2.0-t2i-turbo** | 阿里雲 | **0.04元/張** | 1-3分鐘 | 💰 批量生成、成本控制 | | **wanx2.1-t2i-turbo** | 阿里雲 | 0.14元/張 | 1-3分鐘 | ⚡ 快速原型、測試 | | **wanx2.1-t2i-plus** | 阿里雲 | 0.20元/張 | 1-3分鐘 | 🎨 高品質、商業用途 | | **sora_image** | 老張API | **$0.01/次** | 即時 | 🚀 即時生成、創意探索 | | **gpt-4o-image** | 老張API | **$0.01/次** | 即時 | 🤖 智能理解、複雜場景 | | **gemini-2.5-flash-image-preview** | 老張API | **$0.01/次** | 即時 | ⚡ 高速生成、高效處理 | ### 💡 老張API使用說明 - **計費方式**: 按次計費，每次調用 $0.01，不論生成幾張圖片 - **比例控制**: 在提示詞末尾自動添加【比例】格式，如【3:2】 - **重試機制**: 如果 sora_image 失敗，建議切換到 gpt-4o-image 或 gemini-2.5-flash-image-preview - **數量控制**: 使用 n 參數控制生成數量 (1-4張) - **格式特點**: 基於 ChatGPT PLUS 用戶的生圖請求模擬 ### 🎯 模型選擇建議 **追求速度**: \`gemini-2.5-flash-image-preview\`, \`sora_image\`, \`gpt-4o-image\` (即時回應) **注重成本**: \`wanx2.0-t2i-turbo\` (最便宜，0.04元/張) **平衡選擇**: \`wanx2.1-t2i-turbo\` (速度與品質平衡) **追求品質**: \`wanx2.1-t2i-plus\` (最高品質) **創意實驗**: \`sora_image\` (Sora技術，獨特風格) ### 視頻生成範例 \`\`\`json { "name": "tongyi_wanxiang_generate_video", "arguments": { "img_url": "https://example.com/cat.jpg", "prompt": "小貓在草地上慢慢伸懶腰", "model": "wanx2.1-i2v-turbo", "duration": 4, "resolution": "720P" } } \`\`\` ### 特效視頻範例 \`\`\`json