@bratcliffe909/mcp-server-segmind
Version:
Model Context Protocol server for Segmind API - Generate images and videos using AI models
279 lines • 15.8 kB
JavaScript
import { z } from 'zod';
import { ModelCategory, OutputType } from './types.js';
export const WORKING_MODELS = [
{
id: 'sdxl',
name: 'Stable Diffusion XL',
description: 'High-quality image generation with SDXL 1.0',
category: ModelCategory.TEXT_TO_IMAGE,
endpoint: '/sdxl1.0-txt2img',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 10,
creditsPerUse: 0.3,
parameters: z.object({
prompt: z.string().describe('Text description of the image to generate'),
negative_prompt: z.string().optional().describe('What to avoid in the generated image'),
img_width: z.number().min(256).max(2048).multipleOf(8).default(1024).describe('Image width in pixels (must be multiple of 8)'),
img_height: z.number().min(256).max(2048).multipleOf(8).default(1024).describe('Image height in pixels (must be multiple of 8)'),
samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'),
guidance_scale: z.number().min(1).max(20).default(7.5).describe('How closely to follow the prompt (higher = more literal)'),
num_inference_steps: z.number().min(1).max(100).default(25).describe('Number of denoising steps (higher = better quality but slower)'),
seed: z.number().optional().describe('Random seed for reproducible results'),
scheduler: z.string().optional().default('DDIM').describe('Sampling scheduler algorithm'),
base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'),
}),
supportedFormats: ['png', 'jpeg', 'webp'],
maxDimensions: { width: 2048, height: 2048 }
},
{
id: 'sdxl-lightning',
name: 'SDXL Lightning',
description: 'Fast high-quality image generation with SDXL Lightning',
category: ModelCategory.TEXT_TO_IMAGE,
endpoint: '/sdxl1.0-newreality-lightning',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 5,
creditsPerUse: 0.2,
parameters: z.object({
prompt: z.string().describe('Text description of the image to generate'),
negative_prompt: z.string().optional().describe('What to avoid in the generated image'),
img_width: z.number().min(256).max(2048).multipleOf(8).default(512).describe('Image width in pixels (must be multiple of 8)'),
img_height: z.number().min(256).max(2048).multipleOf(8).default(512).describe('Image height in pixels (must be multiple of 8)'),
samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'),
guidance_scale: z.number().min(1).max(20).default(2).describe('How closely to follow the prompt (lower values for Lightning)'),
num_inference_steps: z.number().min(1).max(100).default(8).describe('Number of denoising steps (Lightning uses fewer steps)'),
seed: z.number().optional().describe('Random seed for reproducible results'),
base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'),
}),
supportedFormats: ['png', 'jpeg', 'webp'],
maxDimensions: { width: 2048, height: 2048 }
},
{
id: 'fooocus',
name: 'Fooocus',
description: 'Advanced image generation with Fooocus',
category: ModelCategory.TEXT_TO_IMAGE,
endpoint: '/fooocus',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 12,
creditsPerUse: 0.4,
parameters: z.object({
prompt: z.string().describe('Text description of the image to generate'),
negative_prompt: z.string().optional().describe('What to avoid in the generated image'),
steps: z.number().min(20).max(100).default(30).describe('Number of generation steps'),
samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'),
styles: z.string().optional().default('V2,Enhance,Sharp').describe('Comma-separated style presets to apply'),
aspect_ratio: z.string().optional().default('1024*1024').describe('Image dimensions format: width*height'),
seed: z.number().optional().default(-1).describe('Random seed (-1 for random)'),
guidance_scale: z.number().min(1).max(25).default(4).describe('How closely to follow the prompt'),
scheduler: z.string().optional().default('DPM++ SDE').describe('Sampling scheduler algorithm'),
base_model: z.string().optional().default('juggernaut_v8').describe('Base model to use for generation'),
}),
supportedFormats: ['png', 'jpeg'],
maxDimensions: { width: 2048, height: 2048 }
},
{
id: 'ssd-1b',
name: 'SSD-1B',
description: 'Efficient billion-parameter model for fast image generation',
category: ModelCategory.TEXT_TO_IMAGE,
endpoint: '/ssd-1b',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 8,
creditsPerUse: 0.25,
parameters: z.object({
prompt: z.string().describe('Text description of the image to generate'),
negative_prompt: z.string().optional().describe('What to avoid in the generated image'),
samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'),
scheduler: z.string().optional().default('DPM2 Karras').describe('Sampling scheduler algorithm'),
num_inference_steps: z.number().min(20).max(100).default(25).describe('Number of denoising steps'),
guidance_scale: z.number().min(1).max(25).default(7.5).describe('How closely to follow the prompt'),
seed: z.number().optional().default(-1).describe('Random seed (-1 for random)'),
img_width: z.number().default(1024).describe('Image width in pixels'),
img_height: z.number().default(1024).describe('Image height in pixels'),
base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'),
}),
supportedFormats: ['png', 'jpeg'],
maxDimensions: { width: 2048, height: 2048 }
},
{
id: 'sd15-img2img',
name: 'SD 1.5 Image-to-Image',
description: 'Transform existing images with Stable Diffusion 1.5',
category: ModelCategory.IMAGE_TO_IMAGE,
endpoint: '/sd1.5-img2img',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 8,
creditsPerUse: 0.3,
parameters: z.object({
prompt: z.string().describe('Text description of the desired transformation'),
negative_prompt: z.string().optional().describe('What to avoid in the transformed image'),
image: z.string().describe('Base64 encoded input image'),
samples: z.number().min(1).max(4).default(1).describe('Number of images to generate'),
scheduler: z.string().optional().default('DDIM').describe('Sampling scheduler algorithm'),
num_inference_steps: z.number().min(1).max(100).default(20).describe('Number of denoising steps'),
guidance_scale: z.number().min(1).max(20).default(7.5).describe('How closely to follow the prompt'),
strength: z.number().min(0).max(1).default(0.7).describe('How much to transform the image (0=no change, 1=complete change)'),
seed: z.number().optional().describe('Random seed for reproducible results'),
base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'),
}),
supportedFormats: ['png', 'jpeg'],
maxDimensions: { width: 1024, height: 1024 }
},
{
id: 'esrgan',
name: 'ESRGAN',
description: 'AI-powered image upscaling and enhancement',
category: ModelCategory.IMAGE_ENHANCEMENT,
endpoint: '/esrgan',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 5,
creditsPerUse: 0.2,
parameters: z.object({
image: z.string().describe('Base64 encoded image to upscale'),
scale: z.number().min(2).max(4).default(2).describe('Upscaling factor (2x, 3x, or 4x)'),
face_enhance: z.boolean().optional().default(false).describe('Apply face enhancement during upscaling'),
base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'),
}),
supportedFormats: ['png', 'jpeg'],
},
{
id: 'codeformer',
name: 'CodeFormer',
description: 'AI face restoration and enhancement',
category: ModelCategory.IMAGE_ENHANCEMENT,
endpoint: '/codeformer',
apiVersion: 'v1',
outputType: OutputType.IMAGE,
estimatedTime: 5,
creditsPerUse: 0.2,
parameters: z.object({
image: z.string().describe('Base64 encoded image with faces to restore'),
fidelity: z.number().min(0).max(1).default(0.5).describe('Balance between quality and identity preservation (0=quality, 1=identity)'),
base64: z.boolean().optional().default(false).describe('Return image as base64 string instead of binary'),
}),
supportedFormats: ['png', 'jpeg'],
},
{
id: 'veo-3',
name: 'Google Veo 3',
description: 'Advanced text-to-video generation with realistic audio synthesis for cinematic content (WARNING: Uses 2.0 credits per generation)',
category: ModelCategory.TEXT_TO_VIDEO,
endpoint: '/veo-3',
apiVersion: 'v1',
outputType: OutputType.VIDEO,
estimatedTime: 30,
creditsPerUse: 2.0,
parameters: z.object({
prompt: z.string().min(1).max(2000).describe('Detailed description of the video content'),
seed: z.number().int().optional().default(0).describe('Random seed for consistent outputs'),
generate_audio: z.boolean().optional().default(true).describe('Enable audio generation for the video'),
aspect_ratio: z.enum(['16:9', '4:3', '1:1', '3:4', '9:16']).optional().default('16:9').describe('Video aspect ratio'),
}),
supportedFormats: ['mp4'],
},
{
id: 'seedance-v1-lite',
name: 'Seedance V1 Lite',
description: 'Fast high-quality text-to-video generation with multi-shot capability',
category: ModelCategory.TEXT_TO_VIDEO,
endpoint: '/seedance-v1-lite-text-to-video',
apiVersion: 'v1',
outputType: OutputType.VIDEO,
estimatedTime: 20,
creditsPerUse: 0.45,
parameters: z.object({
prompt: z.string().min(1).max(1000).describe('Description of the video scene or animation'),
duration: z.number().int().min(5).max(10).default(5).describe('Video duration in seconds'),
aspect_ratio: z.enum(['16:9', '4:3', '1:1', '3:4', '9:16']).default('16:9').describe('Video aspect ratio'),
resolution: z.enum(['480p', '720p']).default('720p').describe('Video resolution'),
seed: z.number().int().min(1).max(999999).optional().describe('Random seed for reproducible results'),
}),
supportedFormats: ['mp4'],
},
{
id: 'dia-tts',
name: 'Dia Text-to-Speech',
description: 'Ultra-realistic multi-speaker dialogue with emotions and nonverbal cues',
category: ModelCategory.TEXT_TO_AUDIO,
endpoint: '/dia',
apiVersion: 'v1',
outputType: OutputType.AUDIO,
estimatedTime: 10,
creditsPerUse: 0.15,
parameters: z.object({
text: z.string().min(1).max(5000).describe('Text with [S1], [S2] speaker tags and emotion cues'),
seed: z.number().int().optional().describe('Random seed for reproducible results'),
top_p: z.number().min(0.1).max(1).default(0.95).describe('Controls word variety. Higher values allow rarer words'),
cfg_scale: z.number().min(1).max(5).default(4).describe('Controls how strictly audio follows text. Higher = more accurate, lower = more natural'),
temperature: z.number().min(0.1).max(2).default(1.3).describe('Controls randomness. Higher = more variety, lower = more consistency'),
input_audio: z.string().optional().describe('Base64 audio for voice cloning (.wav, .mp3, .flac)'),
speed_factor: z.number().min(0.5).max(1.5).default(0.94).describe('Playback speed (0.5-1.5). Default 0.94 = normal speech. Lower = slower, higher = faster'),
max_new_tokens: z.number().min(500).max(4096).default(3072).describe('Controls audio length. Higher values = longer audio'),
cfg_filter_top_k: z.number().min(10).max(100).default(35).describe('Filters audio tokens. Higher values = more diverse sounds'),
}),
supportedFormats: ['mp3', 'wav'],
},
{
id: 'orpheus-tts',
name: 'Orpheus TTS 3B',
description: 'Open-source TTS with emotion tags and natural conversational speech',
category: ModelCategory.TEXT_TO_AUDIO,
endpoint: '/orpheus-3b-0.1',
apiVersion: 'v1',
outputType: OutputType.AUDIO,
estimatedTime: 8,
creditsPerUse: 0.1,
parameters: z.object({
text: z.string().min(1).max(2000).describe('Text with emotion tags like <laugh>, <sigh>'),
voice: z.enum(['tara', 'dan', 'josh', 'emma']).default('dan').describe('Voice character to use'),
top_p: z.number().min(0.1).max(1).default(0.95).describe('Nucleus sampling probability threshold'),
temperature: z.number().min(0.1).max(1.5).default(0.6).describe('Sampling temperature for variation'),
max_new_tokens: z.number().min(100).max(2000).default(1200).describe('Maximum tokens to generate'),
repetition_penalty: z.number().min(1).max(2).default(1.1).describe('Penalty for repeated phrases'),
}),
supportedFormats: ['mp3', 'wav'],
},
{
id: 'lyria-2',
name: 'Lyria 2',
description: 'High-fidelity 48kHz stereo instrumental music generation from text',
category: ModelCategory.TEXT_TO_MUSIC,
endpoint: '/lyria-2',
apiVersion: 'v1',
outputType: OutputType.AUDIO,
estimatedTime: 25,
creditsPerUse: 0.5,
parameters: z.object({
prompt: z.string().min(1).max(500).describe('Description of music theme and mood'),
negative_prompt: z.string().optional().default('No loud drums, no vocals.').describe('Elements to avoid in the music'),
seed: z.number().int().optional().describe('Random seed for reproducible results'),
}),
supportedFormats: ['mp3', 'wav'],
},
{
id: 'minimax-music',
name: 'Minimax Music-01',
description: 'Generate up to 60 seconds of music with accompaniment and vocals',
category: ModelCategory.TEXT_TO_MUSIC,
endpoint: '/minimax-music-01',
apiVersion: 'v1',
outputType: OutputType.AUDIO,
estimatedTime: 40,
creditsPerUse: 0.8,
parameters: z.object({
prompt: z.string().min(1).max(1000).describe('Music description or lyrics'),
reference_audio: z.string().optional().describe('Base64 audio for voice reference'),
instrumental_reference: z.string().optional().describe('Base64 instrumental reference'),
duration: z.number().min(10).max(60).default(30).describe('Music duration in seconds'),
}),
supportedFormats: ['mp3', 'wav'],
},
];
//# sourceMappingURL=working-models.js.map