@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

github.com/lobehub/lobe-chat

lobehub/lobe-chat

272 lines (234 loc) • 8.27 kB

text/typescript

import createDebug from 'debug'; import { CreateImageOptions } from '../../core/openaiCompatibleFactory'; import { CreateImagePayload, CreateImageResponse } from '../../types/image'; import { type TaskResult, asyncifyPolling } from '../../utils/asyncifyPolling'; import { AgentRuntimeError } from '../../utils/createError'; const log = createDebug('lobe-image:qwen'); interface QwenImageTaskResponse { output: { error_message?: string; results?: Array<{ url: string; }>; task_id: string; task_status: 'PENDING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED'; }; request_id: string; } // Interface for qwen-image-edit multimodal-generation response interface QwenImageEditResponse { output: { choices: Array<{ message: { content: Array<{ image: string; }>; }; }>; }; request_id: string; } /** * Create an image generation task with Qwen API for text-to-image models */ async function createImageTask(payload: CreateImagePayload, apiKey: string): Promise<string> { const { model, params } = payload; // I can only say that the design of Alibaba Cloud's API is really bad; each model has a different endpoint path. const endpoint = `https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis`; log('Creating image task with model: %s, endpoint: %s', model, endpoint); const response = await fetch(endpoint, { body: JSON.stringify({ input: { prompt: params.prompt, // negativePrompt is not part of standard parameters // but can be supported by extending the params type if needed }, model, parameters: { n: 1, ...(typeof params.seed === 'number' ? { seed: params.seed } : {}), ...(params.width && params.height ? { size: `${params.width}*${params.height}` } : params.size ? { size: params.size.replaceAll('x', '*') } : { size: '1024*1024' }), }, }), headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', 'X-DashScope-Async': 'enable', }, method: 'POST', }); if (!response.ok) { let errorData; try { errorData = await response.json(); } catch { // Failed to parse JSON error response } throw new Error( `Failed to create image task (${response.status}): ${errorData?.message || response.statusText}`, ); } const data: QwenImageTaskResponse = await response.json(); log('Task created with ID: %s', data.output.task_id); return data.output.task_id; } /** * Create image with Qwen image-edit API for image-to-image models * This is a synchronous API that returns the result directly */ async function createImageEdit( payload: CreateImagePayload, apiKey: string, ): Promise<CreateImageResponse> { const { model, params } = payload; const endpoint = `https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation`; log('Creating image edit with model: %s, endpoint: %s', model, endpoint); // Handle imageUrls to imageUrl conversion let imageUrl = params.imageUrl; if (!imageUrl && params.imageUrls && params.imageUrls.length > 0) { imageUrl = params.imageUrls[0]; log('Converting imageUrls to imageUrl: using first image %s', imageUrl); } if (!imageUrl) { throw new Error('imageUrl or imageUrls is required for qwen-image-edit model'); } const response = await fetch(endpoint, { body: JSON.stringify({ input: { messages: [ { content: [{ image: imageUrl }, { text: params.prompt }], role: 'user', }, ], }, model, parameters: { ...(typeof params.seed === 'number' ? { seed: params.seed } : {}), }, }), headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', }, method: 'POST', }); if (!response.ok) { let errorData; try { errorData = await response.json(); } catch { // Failed to parse JSON error response } throw new Error( `Failed to create image edit (${response.status}): ${errorData?.message || response.statusText}`, ); } const data: QwenImageEditResponse = await response.json(); if (!data.output.choices || data.output.choices.length === 0) { throw new Error('No image choices returned from qwen-image-edit API'); } const choice = data.output.choices[0]; if (!choice.message.content || choice.message.content.length === 0) { throw new Error('No image content returned from qwen-image-edit API'); } const imageContent = choice.message.content.find((content) => 'image' in content); if (!imageContent) { throw new Error('No image found in response content'); } const resultImageUrl = imageContent.image; log('Image edit generated successfully: %s', resultImageUrl); return { imageUrl: resultImageUrl }; } /** * Query the status of an image generation task */ async function queryTaskStatus(taskId: string, apiKey: string): Promise<QwenImageTaskResponse> { const endpoint = `https://dashscope.aliyuncs.com/api/v1/tasks/${taskId}`; log('Querying task status for: %s', taskId); const response = await fetch(endpoint, { headers: { Authorization: `Bearer ${apiKey}`, }, }); if (!response.ok) { let errorData; try { errorData = await response.json(); } catch { // Failed to parse JSON error response } throw new Error( `Failed to query task status (${response.status}): ${errorData?.message || response.statusText}`, ); } return response.json(); } /** * Create image using Qwen API * Supports both text-to-image (async with polling) and image-to-image (sync) workflows */ export async function createQwenImage( payload: CreateImagePayload, options: CreateImageOptions, ): Promise<CreateImageResponse> { const { apiKey, provider } = options; const { model } = payload; try { // Check if this is qwen-image-edit model for image-to-image if (model === 'qwen-image-edit') { log('Using multimodal-generation API for qwen-image-edit model'); return await createImageEdit(payload, apiKey); } // Default to text-to-image workflow for other qwen models log('Using text2image API for model: %s', model); // 1. Create image generation task const taskId = await createImageTask(payload, apiKey); // 2. Poll task status until completion using asyncifyPolling const result = await asyncifyPolling<QwenImageTaskResponse, CreateImageResponse>({ checkStatus: (taskStatus: QwenImageTaskResponse): TaskResult<CreateImageResponse> => { log('Task %s status: %s', taskId, taskStatus.output.task_status); if (taskStatus.output.task_status === 'SUCCEEDED') { if (!taskStatus.output.results || taskStatus.output.results.length === 0) { return { error: new Error('Task succeeded but no images generated'), status: 'failed', }; } const generatedImageUrl = taskStatus.output.results[0].url; log('Image generated successfully: %s', generatedImageUrl); return { data: { imageUrl: generatedImageUrl }, status: 'success', }; } if (taskStatus.output.task_status === 'FAILED') { const errorMessage = taskStatus.output.error_message || 'Image generation task failed'; return { error: new Error(`Qwen image generation failed: ${errorMessage}`), status: 'failed', }; } // Continue polling for pending/running status or other unknown statuses return { status: 'pending' }; }, logger: { debug: (message: any, ...args: any[]) => log(message, ...args), error: (message: any, ...args: any[]) => log(message, ...args), }, pollingQuery: () => queryTaskStatus(taskId, apiKey), }); return result; } catch (error) { log('Error in createQwenImage: %O', error); throw AgentRuntimeError.createImage({ error: error as any, errorType: 'ProviderBizError', provider, }); } }