UNPKG

@bratcliffe909/mcp-server-segmind

Version:

Model Context Protocol server for Segmind API - Generate images and videos using AI models

279 lines 15.8 kB
import { z } from 'zod'; import { ModelCategory, OutputType } from './types.js'; export const WORKING_MODELS = [ { id: 'sdxl', name: 'Stable Diffusion XL', description: 'High-quality image generation with SDXL 1.0', category: ModelCategory.TEXT_TO_IMAGE, endpoint: '/sdxl1.0-txt2img', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 10, creditsPerUse: 0.3, parameters: z.object({ prompt: z.string().describe('Text description of the image to generate'), negative_prompt: z.string().optional().describe('What to avoid in the generated image'), img_width: z.number().min(256).max(2048).multipleOf(8).default(1024).describe('Image width in pixels (must be multiple of 8)'), img_height: z.number().min(256).max(2048).multipleOf(8).default(1024).describe('Image height in pixels (must be multiple of 8)'), samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'), guidance_scale: z.number().min(1).max(20).default(7.5).describe('How closely to follow the prompt (higher = more literal)'), num_inference_steps: z.number().min(1).max(100).default(25).describe('Number of denoising steps (higher = better quality but slower)'), seed: z.number().optional().describe('Random seed for reproducible results'), scheduler: z.string().optional().default('DDIM').describe('Sampling scheduler algorithm'), base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'), }), supportedFormats: ['png', 'jpeg', 'webp'], maxDimensions: { width: 2048, height: 2048 } }, { id: 'sdxl-lightning', name: 'SDXL Lightning', description: 'Fast high-quality image generation with SDXL Lightning', category: ModelCategory.TEXT_TO_IMAGE, endpoint: '/sdxl1.0-newreality-lightning', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 5, creditsPerUse: 0.2, parameters: z.object({ prompt: z.string().describe('Text description of the image to generate'), negative_prompt: z.string().optional().describe('What to avoid in the generated image'), img_width: z.number().min(256).max(2048).multipleOf(8).default(512).describe('Image width in pixels (must be multiple of 8)'), img_height: z.number().min(256).max(2048).multipleOf(8).default(512).describe('Image height in pixels (must be multiple of 8)'), samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'), guidance_scale: z.number().min(1).max(20).default(2).describe('How closely to follow the prompt (lower values for Lightning)'), num_inference_steps: z.number().min(1).max(100).default(8).describe('Number of denoising steps (Lightning uses fewer steps)'), seed: z.number().optional().describe('Random seed for reproducible results'), base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'), }), supportedFormats: ['png', 'jpeg', 'webp'], maxDimensions: { width: 2048, height: 2048 } }, { id: 'fooocus', name: 'Fooocus', description: 'Advanced image generation with Fooocus', category: ModelCategory.TEXT_TO_IMAGE, endpoint: '/fooocus', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 12, creditsPerUse: 0.4, parameters: z.object({ prompt: z.string().describe('Text description of the image to generate'), negative_prompt: z.string().optional().describe('What to avoid in the generated image'), steps: z.number().min(20).max(100).default(30).describe('Number of generation steps'), samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'), styles: z.string().optional().default('V2,Enhance,Sharp').describe('Comma-separated style presets to apply'), aspect_ratio: z.string().optional().default('1024*1024').describe('Image dimensions format: width*height'), seed: z.number().optional().default(-1).describe('Random seed (-1 for random)'), guidance_scale: z.number().min(1).max(25).default(4).describe('How closely to follow the prompt'), scheduler: z.string().optional().default('DPM++ SDE').describe('Sampling scheduler algorithm'), base_model: z.string().optional().default('juggernaut_v8').describe('Base model to use for generation'), }), supportedFormats: ['png', 'jpeg'], maxDimensions: { width: 2048, height: 2048 } }, { id: 'ssd-1b', name: 'SSD-1B', description: 'Efficient billion-parameter model for fast image generation', category: ModelCategory.TEXT_TO_IMAGE, endpoint: '/ssd-1b', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 8, creditsPerUse: 0.25, parameters: z.object({ prompt: z.string().describe('Text description of the image to generate'), negative_prompt: z.string().optional().describe('What to avoid in the generated image'), samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'), scheduler: z.string().optional().default('DPM2 Karras').describe('Sampling scheduler algorithm'), num_inference_steps: z.number().min(20).max(100).default(25).describe('Number of denoising steps'), guidance_scale: z.number().min(1).max(25).default(7.5).describe('How closely to follow the prompt'), seed: z.number().optional().default(-1).describe('Random seed (-1 for random)'), img_width: z.number().default(1024).describe('Image width in pixels'), img_height: z.number().default(1024).describe('Image height in pixels'), base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'), }), supportedFormats: ['png', 'jpeg'], maxDimensions: { width: 2048, height: 2048 } }, { id: 'sd15-img2img', name: 'SD 1.5 Image-to-Image', description: 'Transform existing images with Stable Diffusion 1.5', category: ModelCategory.IMAGE_TO_IMAGE, endpoint: '/sd1.5-img2img', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 8, creditsPerUse: 0.3, parameters: z.object({ prompt: z.string().describe('Text description of the desired transformation'), negative_prompt: z.string().optional().describe('What to avoid in the transformed image'), image: z.string().describe('Base64 encoded input image'), samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'), scheduler: z.string().optional().default('DDIM').describe('Sampling scheduler algorithm'), num_inference_steps: z.number().min(1).max(100).default(20).describe('Number of denoising steps'), guidance_scale: z.number().min(1).max(20).default(7.5).describe('How closely to follow the prompt'), strength: z.number().min(0).max(1).default(0.7).describe('How much to transform the image (0=no change, 1=complete change)'), seed: z.number().optional().describe('Random seed for reproducible results'), base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'), }), supportedFormats: ['png', 'jpeg'], maxDimensions: { width: 1024, height: 1024 } }, { id: 'esrgan', name: 'ESRGAN', description: 'AI-powered image upscaling and enhancement', category: ModelCategory.IMAGE_ENHANCEMENT, endpoint: '/esrgan', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 5, creditsPerUse: 0.2, parameters: z.object({ image: z.string().describe('Base64 encoded image to upscale'), scale: z.number().min(2).max(4).default(2).describe('Upscaling factor (2x, 3x, or 4x)'), face_enhance: z.boolean().optional().default(false).describe('Apply face enhancement during upscaling'), base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'), }), supportedFormats: ['png', 'jpeg'], }, { id: 'codeformer', name: 'CodeFormer', description: 'AI face restoration and enhancement', category: ModelCategory.IMAGE_ENHANCEMENT, endpoint: '/codeformer', apiVersion: 'v1', outputType: OutputType.IMAGE, estimatedTime: 5, creditsPerUse: 0.2, parameters: z.object({ image: z.string().describe('Base64 encoded image with faces to restore'), fidelity: z.number().min(0).max(1).default(0.5).describe('Balance between quality and identity preservation (0=quality, 1=identity)'), base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'), }), supportedFormats: ['png', 'jpeg'], }, { id: 'veo-3', name: 'Google Veo 3', description: 'Advanced text-to-video generation with realistic audio synthesis for cinematic content (WARNING: Uses 2.0 credits per generation)', category: ModelCategory.TEXT_TO_VIDEO, endpoint: '/veo-3', apiVersion: 'v1', outputType: OutputType.VIDEO, estimatedTime: 30, creditsPerUse: 2.0, parameters: z.object({ prompt: z.string().min(1).max(2000).describe('Detailed description of the video content'), seed: z.number().int().optional().default(0).describe('Random seed for consistent outputs'), generate_audio: z.boolean().optional().default(true).describe('Enable audio generation for the video'), aspect_ratio: z.enum(['16:9', '4:3', '1:1', '3:4', '9:16']).optional().default('16:9').describe('Video aspect ratio'), }), supportedFormats: ['mp4'], }, { id: 'seedance-v1-lite', name: 'Seedance V1 Lite', description: 'Fast high-quality text-to-video generation with multi-shot capability', category: ModelCategory.TEXT_TO_VIDEO, endpoint: '/seedance-v1-lite-text-to-video', apiVersion: 'v1', outputType: OutputType.VIDEO, estimatedTime: 20, creditsPerUse: 0.45, parameters: z.object({ prompt: z.string().min(1).max(1000).describe('Description of the video scene or animation'), duration: z.number().int().min(5).max(10).default(5).describe('Video duration in seconds'), aspect_ratio: z.enum(['16:9', '4:3', '1:1', '3:4', '9:16']).default('16:9').describe('Video aspect ratio'), resolution: z.enum(['480p', '720p']).default('720p').describe('Video resolution'), seed: z.number().int().min(1).max(999999).optional().describe('Random seed for reproducible results'), }), supportedFormats: ['mp4'], }, { id: 'dia-tts', name: 'Dia Text-to-Speech', description: 'Ultra-realistic multi-speaker dialogue with emotions and nonverbal cues', category: ModelCategory.TEXT_TO_AUDIO, endpoint: '/dia', apiVersion: 'v1', outputType: OutputType.AUDIO, estimatedTime: 10, creditsPerUse: 0.15, parameters: z.object({ text: z.string().min(1).max(5000).describe('Text with [S1], [S2] speaker tags and emotion cues'), seed: z.number().int().optional().describe('Random seed for reproducible results'), top_p: z.number().min(0.1).max(1).default(0.95).describe('Controls word variety. Higher values allow rarer words'), cfg_scale: z.number().min(1).max(5).default(4).describe('Controls how strictly audio follows text. Higher = more accurate, lower = more natural'), temperature: z.number().min(0.1).max(2).default(1.3).describe('Controls randomness. Higher = more variety, lower = more consistency'), input_audio: z.string().optional().describe('Base64 audio for voice cloning (.wav, .mp3, .flac)'), speed_factor: z.number().min(0.5).max(1.5).default(0.94).describe('Playback speed (0.5-1.5). Default 0.94 = normal speech. Lower = slower, higher = faster'), max_new_tokens: z.number().min(500).max(4096).default(3072).describe('Controls audio length. Higher values = longer audio'), cfg_filter_top_k: z.number().min(10).max(100).default(35).describe('Filters audio tokens. Higher values = more diverse sounds'), }), supportedFormats: ['mp3', 'wav'], }, { id: 'orpheus-tts', name: 'Orpheus TTS 3B', description: 'Open-source TTS with emotion tags and natural conversational speech', category: ModelCategory.TEXT_TO_AUDIO, endpoint: '/orpheus-3b-0.1', apiVersion: 'v1', outputType: OutputType.AUDIO, estimatedTime: 8, creditsPerUse: 0.1, parameters: z.object({ text: z.string().min(1).max(2000).describe('Text with emotion tags like <laugh>, <sigh>'), voice: z.enum(['tara', 'dan', 'josh', 'emma']).default('dan').describe('Voice character to use'), top_p: z.number().min(0.1).max(1).default(0.95).describe('Nucleus sampling probability threshold'), temperature: z.number().min(0.1).max(1.5).default(0.6).describe('Sampling temperature for variation'), max_new_tokens: z.number().min(100).max(2000).default(1200).describe('Maximum tokens to generate'), repetition_penalty: z.number().min(1).max(2).default(1.1).describe('Penalty for repeated phrases'), }), supportedFormats: ['mp3', 'wav'], }, { id: 'lyria-2', name: 'Lyria 2', description: 'High-fidelity 48kHz stereo instrumental music generation from text', category: ModelCategory.TEXT_TO_MUSIC, endpoint: '/lyria-2', apiVersion: 'v1', outputType: OutputType.AUDIO, estimatedTime: 25, creditsPerUse: 0.5, parameters: z.object({ prompt: z.string().min(1).max(500).describe('Description of music theme and mood'), negative_prompt: z.string().optional().default('No loud drums, no vocals.').describe('Elements to avoid in the music'), seed: z.number().int().optional().describe('Random seed for reproducible results'), }), supportedFormats: ['mp3', 'wav'], }, { id: 'minimax-music', name: 'Minimax Music-01', description: 'Generate up to 60 seconds of music with accompaniment and vocals', category: ModelCategory.TEXT_TO_MUSIC, endpoint: '/minimax-music-01', apiVersion: 'v1', outputType: OutputType.AUDIO, estimatedTime: 40, creditsPerUse: 0.8, parameters: z.object({ prompt: z.string().min(1).max(1000).describe('Music description or lyrics'), reference_audio: z.string().optional().describe('Base64 audio for voice reference'), instrumental_reference: z.string().optional().describe('Base64 instrumental reference'), duration: z.number().min(10).max(60).default(30).describe('Music duration in seconds'), }), supportedFormats: ['mp3', 'wav'], }, ]; //# sourceMappingURL=working-models.js.map