@dfeirstein/image-server
Version:
MCP server for creating, editing, and combining images with OpenAI models
532 lines (502 loc) • 27.6 kB
JavaScript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js';
import { z } from 'zod'; // Keep zod for input validation
import { zodToJsonSchema } from 'zod-to-json-schema';
import { config } from 'dotenv';
import { imageReqSchema } from './types/image.js';
import { getOpenAIClient } from './lib/openaiClient.js'; // Import the function
import { buildPrompt } from './lib/promptBuilder.js';
import { save } from './lib/fileSaver.js';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import fs from 'node:fs';
import { toFile } from 'openai';
console.error("[MCP DEBUG] Server script started."); // Log start
config(); // Load .env file if present (though MCP client injects it)
console.error("[MCP DEBUG] dotenv config loaded."); // Log dotenv
// --- Tool Definition ---
// Generate the full JSON schema including definitions
const fullJsonSchema = zodToJsonSchema(imageReqSchema, "imageReqSchema");
// Extract the core schema definition that Cline expects (with top-level "type": "object")
const inputSchemaForCline = fullJsonSchema.definitions?.imageReqSchema ?? { type: 'object' }; // Fallback just in case
const CREATE_IMAGE_TOOL = {
name: 'create_image',
description: 'Generates an image using OpenAI (DALL-E 3 / gpt-image-1) based on a detailed text prompt. For best results, provide vivid descriptions incorporating style, composition, lighting, and mood. Can also edit or combine existing images by providing referenceImagePaths. Refer to \'docs/prompt-recipes.md\' for extensive examples, templates, and tips for various image types (hero backgrounds, icons, illustrations, photos). Key parameters include \'prompt\', \'brandSignature\' (use project palette), \'size\' (e.g., 1024x1024, 1536x1024), \'quality\', \'model\', \'filename\', \'outputPath\', \'targetProjectDir\', and \'referenceImagePaths\' (for editing/combining images).',
// Use the extracted schema object
inputSchema: inputSchemaForCline, // Cast to any to satisfy SDK type
};
// --- Server Class ---
class ImageMcpServer {
server;
// ES module equivalent of __dirname needed for path resolution
__filename = fileURLToPath(import.meta.url);
__dirname = path.dirname(this.__filename);
constructor() {
console.error("[MCP DEBUG] ImageMcpServer constructor entered."); // Log constructor start
// Determine resource paths by checking multiple possible locations
const findResource = (relativePath) => {
console.error(`[MCP DEBUG] Looking for resource: ${relativePath}`);
console.error(`[MCP DEBUG] Working directory: ${process.cwd()}`);
console.error(`[MCP DEBUG] Script directory: ${this.__dirname}`);
try {
// Get the module path (where the npm package is installed)
const modulePath = path.dirname(require.resolve('@dfeirstein/image-server/package.json', { paths: [process.cwd(), this.__dirname] }));
console.error(`[MCP DEBUG] Module path: ${modulePath}`);
// Possible paths (from most to least likely)
const possiblePaths = [
path.resolve(modulePath, relativePath), // Direct from npm module root
path.resolve(this.__dirname, '..', relativePath), // Path from dist/ to root
path.resolve(this.__dirname, '../..', relativePath), // Path from src/ to root
path.resolve(process.cwd(), relativePath), // Current working directory
// NPX specific paths
path.resolve(process.env.npm_config_local_prefix || '', 'node_modules/@dfeirstein/image-server', relativePath),
];
console.error('[MCP DEBUG] Checking possible paths:');
for (const p of possiblePaths) {
console.error(`[MCP DEBUG] - Checking: ${p}`);
if (fs.existsSync(p)) {
console.error(`[MCP DEBUG] Found resource at: ${p}`);
// Read the first few lines to verify it's the right file
try {
const content = fs.readFileSync(p, 'utf8').slice(0, 200);
console.error(`[MCP DEBUG] First 200 chars: ${content.replace(/\n/g, '\\n')}`);
}
catch (err) {
console.error(`[MCP DEBUG] Could not read file: ${err.message}`);
}
return p;
}
}
}
catch (err) {
console.error(`[MCP DEBUG] Error resolving paths: ${err.message}`);
}
console.error(`[MCP WARNING] Could not find resource: ${relativePath}`);
// Instead of returning a non-existent path, use embedded content
if (relativePath === 'docs/prompt-recipes.md') {
const embeddedPath = path.join(this.__dirname, 'embedded-prompt-recipes.md');
// Create an embedded copy of the file
try {
const content = `# Prompt Recipes for Image Generation
This is embedded documentation for the image-mcp-server. The server couldn't find the prompt-recipes.md file, so it's providing this embedded version instead.
## Basic Prompt Structure
A good prompt should include:
1. Subject description (what/who)
2. Style details (photorealistic, cartoon, etc.)
3. Lighting and mood
4. Technical specifications (if needed)
## Examples
### Icon Design
"A minimalist cloud icon with subtle gradient, clean lines, professional tech style, light blue color scheme"
`;
fs.writeFileSync(embeddedPath, content);
console.error(`[MCP DEBUG] Created embedded resource at: ${embeddedPath}`);
return embeddedPath;
}
catch (err) {
console.error(`[MCP DEBUG] Failed to create embedded resource: ${err.message}`);
}
}
return null; // Return null to indicate resource not found
};
// Create resources object with only valid resources
const resources = {};
// Find and add prompt recipes
const promptRecipesPath = findResource('docs/prompt-recipes.md');
if (promptRecipesPath) {
console.error(`[MCP DEBUG] Adding prompt-recipes resource at path: ${promptRecipesPath}`);
resources["docs/prompt-recipes"] = {
type: "text/markdown",
path: promptRecipesPath
};
}
else {
// Create embedded resource for prompt recipes
const embeddedPath = path.join(this.__dirname, 'embedded-prompt-recipes.md');
try {
// Create more comprehensive embedded content - this is important to actually save the file
const content = `# Prompt Recipes for Image Generation
This is embedded documentation for the image-mcp-server. The server couldn't find the prompt-recipes.md file, so it's providing this embedded version instead.
## Basic Prompt Structure
A good prompt should include:
1. Subject description (what/who)
2. Style details (photorealistic, cartoon, etc.)
3. Lighting and mood
4. Technical specifications (if needed)
## Examples
### Icon Design
"A minimalist cloud icon with subtle gradient, clean lines, professional tech style, light blue color scheme"
### Photorealistic Portrait
"Professional headshot of a middle-aged business executive, neutral expression, studio lighting, high-end DSLR quality, shallow depth of field"
### Background/Hero Image
"Abstract technology background with blue and purple gradient, subtle digital patterns, modern and clean design, suitable for header/hero section"
### Product Visualization
"3D render of a sleek smartphone on a minimalist surface, dramatic lighting from top-right, professional product photography style"
`;
fs.writeFileSync(embeddedPath, content);
console.error(`[MCP DEBUG] Created and added embedded prompt-recipes resource at: ${embeddedPath}`);
resources["docs/prompt-recipes"] = {
type: "text/markdown",
path: embeddedPath
};
}
catch (err) {
console.error(`[MCP ERROR] Failed to create embedded prompt-recipes: ${err.message}`);
}
}
// Find and add readme
const readmePath = findResource('README.md');
if (readmePath) {
console.error(`[MCP DEBUG] Adding readme resource at path: ${readmePath}`);
resources["docs/readme"] = {
type: "text/markdown",
path: readmePath
};
}
else {
// Create embedded resource for readme
const embeddedPath = path.join(this.__dirname, 'embedded-readme.md');
try {
const content = `# Image MCP Server
This MCP server provides image generation capabilities using OpenAI's API.
## Usage
The server provides the \`create_image\` tool for generating images from text prompts.
Key parameters:
- prompt: Text description of the desired image
- model: OpenAI model to use (gpt-image-1, dall-e-3, dall-e-2)
- size: Image dimensions (1024x1024, 1024x1536, 1536x1024)
- quality: Image quality (low, medium, high)
- background: Type of background (transparent, opaque)
`;
fs.writeFileSync(embeddedPath, content);
console.error(`[MCP DEBUG] Created and added embedded readme resource at: ${embeddedPath}`);
resources["docs/readme"] = {
type: "text/markdown",
path: embeddedPath
};
}
catch (err) {
console.error(`[MCP ERROR] Failed to create embedded readme: ${err.message}`);
}
}
// Show summary of resources being registered
console.error(`[MCP DEBUG] Registering ${Object.keys(resources).length} resources:`);
for (const [key, resource] of Object.entries(resources)) {
console.error(`[MCP DEBUG] - ${key}: ${resource.path} (${resource.type})`);
// Verify file exists and is readable
try {
const stats = fs.statSync(resource.path);
console.error(`[MCP DEBUG] File exists: ${stats.isFile()}, size: ${stats.size} bytes`);
// Read the first 100 characters to verify content
const content = fs.readFileSync(resource.path, 'utf8').slice(0, 100);
console.error(`[MCP DEBUG] Content preview: ${content.replace(/\n/g, ' ')}`);
}
catch (err) {
console.error(`[MCP ERROR] Error checking resource file: ${err.message}`);
}
}
// Create and store embedded docs for direct access
this.embeddedDocs = new Map();
// Create embedded prompt recipes if needed
if (!promptRecipesPath) {
const content = `# Prompt Recipes for Image Generation
This is embedded documentation for the image-mcp-server. The server couldn't find the prompt-recipes.md file, so it's providing this embedded version instead.
## Basic Prompt Structure
A good prompt should include:
1. Subject description (what/who)
2. Style details (photorealistic, cartoon, etc.)
3. Lighting and mood
4. Technical specifications (if needed)
## Examples
### Icon Design
"A minimalist cloud icon with subtle gradient, clean lines, professional tech style, light blue color scheme"
### Photorealistic Portrait
"Professional headshot of a middle-aged business executive, neutral expression, studio lighting, high-end DSLR quality, shallow depth of field"
### Background/Hero Image
"Abstract technology background with blue and purple gradient, subtle digital patterns, modern and clean design, suitable for header/hero section"
### Product Visualization
"3D render of a sleek smartphone on a minimalist surface, dramatic lighting from top-right, professional product photography style"`;
this.embeddedDocs.set("docs/prompt-recipes", content);
console.error(`[MCP DEBUG] Created embedded prompt recipes content`);
}
else {
try {
const content = fs.readFileSync(promptRecipesPath, 'utf8');
this.embeddedDocs.set("docs/prompt-recipes", content);
console.error(`[MCP DEBUG] Loaded prompt recipes from file: ${promptRecipesPath}`);
}
catch (err) {
console.error(`[MCP ERROR] Failed to read prompt recipes file: ${err.message}`);
}
}
// Create embedded readme if needed
if (!readmePath) {
const content = `# Image MCP Server
This MCP server provides image generation capabilities using OpenAI's API.
## Usage
The server provides the \`create_image\` tool for generating images from text prompts.
Key parameters:
- prompt: Text description of the desired image
- model: OpenAI model to use (gpt-image-1, dall-e-3, dall-e-2)
- size: Image dimensions (1024x1024, 1024x1536, 1536x1024)
- quality: Image quality (low, medium, high)
- background: Type of background (transparent, opaque)`;
this.embeddedDocs.set("docs/readme", content);
console.error(`[MCP DEBUG] Created embedded readme content`);
}
else {
try {
const content = fs.readFileSync(readmePath, 'utf8');
this.embeddedDocs.set("docs/readme", content);
console.error(`[MCP DEBUG] Loaded readme from file: ${readmePath}`);
}
catch (err) {
console.error(`[MCP ERROR] Failed to read readme file: ${err.message}`);
}
}
// Initialize server with empty resources - we'll handle them directly
this.server = new Server({
// Use package name from package.json
name: '@dfeirstein/image-server',
version: '1.0.5', // Match package.json
}, {
capabilities: {
tools: { [CREATE_IMAGE_TOOL.name]: CREATE_IMAGE_TOOL },
resources: {}, // Empty resources - we'll handle them with request handlers
},
});
console.error("[MCP DEBUG] MCP SDK Server instance created."); // Log SDK server creation
// Set up resource handlers that work more reliably
this.setupResourceHandlers();
this.setupToolHandlers();
// Basic error logging
this.server.onerror = (error) => console.error('[MCP Error]', error); // Explicit any type
process.on('SIGINT', async () => {
await this.server.close();
process.exit(0);
});
}
embeddedDocs;
setupResourceHandlers() {
console.error("[MCP DEBUG] Setting up resource handlers");
// Handle resource listing
this.server.setRequestHandler(ListResourcesRequestSchema, async () => {
console.error("[MCP DEBUG] Handling ListResourcesRequest");
return {
resources: [
{
uri: "docs/prompt-recipes",
name: "Prompt Recipes for Image Generation",
mimeType: "text/markdown",
},
{
uri: "docs/readme",
name: "Image MCP Server Documentation",
mimeType: "text/markdown",
}
]
};
});
// Handle resource reading
this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
const uri = request.params.uri.toString();
console.error(`[MCP DEBUG] Handling ReadResourceRequest for ${uri}`);
if (this.embeddedDocs.has(uri)) {
const content = this.embeddedDocs.get(uri);
console.error(`[MCP DEBUG] Returning content for ${uri} (${content?.length} bytes)`);
return {
contents: [{
uri,
mimeType: "text/markdown",
text: content,
}]
};
}
console.error(`[MCP ERROR] Resource not found: ${uri}`);
throw new Error(`Resource not found: ${uri}`);
});
console.error("[MCP DEBUG] Resource handlers set up successfully");
}
setupToolHandlers() {
console.error("[MCP DEBUG] setupToolHandlers entered."); // Log handler setup start
// Handler for listing available tools
this.server.setRequestHandler(ListToolsRequestSchema, async () => {
console.error("[MCP DEBUG] ListToolsRequest received. Returning tools:", JSON.stringify([CREATE_IMAGE_TOOL])); // Log before returning tools
return {
tools: [CREATE_IMAGE_TOOL],
};
});
console.error("[MCP DEBUG] ListTools handler set."); // Log handler set
// Handler for executing the tool call
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
if (request.params.name !== CREATE_IMAGE_TOOL.name) {
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
}
let input;
try {
// Log incoming request arguments for debugging
console.error("[MCP DEBUG] Incoming request arguments:", JSON.stringify(request.params.arguments, null, 2));
// Validate input arguments using the Zod schema
input = imageReqSchema.parse(request.params.arguments);
}
catch (err) {
if (err instanceof z.ZodError) {
// Throw specific MCP error for invalid parameters
throw new McpError(ErrorCode.InvalidParams, `Invalid input arguments: ${err.errors.map(e => `${e.path.join('.')}: ${e.message}`).join(', ')}`, err.errors // Optionally include Zod error details
);
}
// Re-throw other parsing errors
throw err;
}
// --- Core Image Generation Logic (adapted from Express route) ---
try {
// Create the final prompt using only what was provided in the input
// No fallbacks for brandSignature - use exactly what was provided (or undefined)
const finalPrompt = buildPrompt(input.prompt, input.brandSignature, input.styleDefinitionJSON);
// Log the complete prompt for debugging
console.error("[MCP DEBUG] Final prompt being sent to OpenAI:");
console.error("---PROMPT START---");
console.error(finalPrompt);
console.error("---PROMPT END---");
// Log style JSON if provided (for debugging)
if (input.styleDefinitionJSON) {
console.error("[MCP DEBUG] Style definition JSON provided:", JSON.stringify(input.styleDefinitionJSON, null, 2));
}
// Get OpenAI client instance when needed
const openai = getOpenAIClient();
let rsp;
// Check if reference images are provided
if (input.referenceImagePaths && input.referenceImagePaths.length > 0) {
console.error("[MCP DEBUG] Using images.edit with reference images:", input.referenceImagePaths);
// Determine base directory for reference images
const baseDir = input.targetProjectDir ?? process.cwd();
const publicDir = path.resolve(baseDir, 'public');
// Load reference images
const images = await Promise.all(input.referenceImagePaths.map(async (imagePath) => {
// Resolve full path to the image (relative to public directory)
const fullImagePath = path.resolve(publicDir, imagePath);
console.error(`[MCP DEBUG] Loading reference image from ${fullImagePath}`);
// Determine MIME type based on file extension
const ext = path.extname(fullImagePath).toLowerCase();
const mimeType = ext === '.png' ? 'image/png' :
ext === '.jpg' || ext === '.jpeg' ? 'image/jpeg' :
ext === '.webp' ? 'image/webp' : 'image/png'; // Default to PNG
try {
// Create a readable stream for the image file
const stream = fs.createReadStream(fullImagePath);
// Convert to OpenAI compatible format
return await toFile(stream, path.basename(fullImagePath), { type: mimeType });
}
catch (err) {
console.error(`[MCP ERROR] Failed to load reference image ${fullImagePath}:`, err);
throw new Error(`Failed to load reference image ${imagePath}: ${err.message || 'Unknown error'}`);
}
}));
// Ensure we'll use gpt-image-1 for edit operations
const editModel = 'gpt-image-1';
if (input.model && input.model !== editModel) {
console.error(`[MCP DEBUG] Overriding model ${input.model} with ${editModel} for image edit operation`);
}
// Call OpenAI images.edit API
rsp = await openai.images.edit({
model: editModel, // Always use gpt-image-1 for edits
image: images, // Pass the loaded reference images
prompt: finalPrompt,
background: input.background, // Directly include background parameter
n: 1,
// size is not supported in edit operations
// response_format defaults to b64_json
}); // Type assertion to bypass TypeScript checking
console.error("[MCP DEBUG] Successfully completed images.edit call");
}
else {
console.error("[MCP DEBUG] Using images.generate (no reference images)");
// Call OpenAI images.generate API (existing functionality)
rsp = await openai.images.generate({
model: input.model ?? 'dall-e-3', // Keep dall-e-3 fallback
prompt: finalPrompt,
size: input.size,
quality: input.quality, // Pass validated quality directly
background: input.background, // Directly include background parameter
n: 1,
// response_format defaults to b64_json for Node client
});
console.error("[MCP DEBUG] Successfully completed images.generate call");
}
// Validate response and get image data
if (!rsp.data?.[0]?.b64_json) {
throw new Error('Invalid or missing image data in OpenAI API response');
}
const imgBytes = Buffer.from(rsp.data[0].b64_json, 'base64');
// Save the file
const filename = input.filename ?? `img_${Date.now()}.png`;
const outputPath = input.outputPath ?? '';
// Determine base directory: use targetProjectDir if provided, otherwise use server's CWD as fallback
const baseDir = input.targetProjectDir ?? process.cwd();
// Save relative to target project's public folder
const saveDir = path.resolve(baseDir, 'public', outputPath);
// NOTE: Removed debug log here compared to installed version
const fullPath = await save(imgBytes, filename, saveDir); // Pass absolute save directory
// Calculate path relative to the *target* public directory for the response
const publicDir = path.resolve(baseDir, 'public');
const relativePath = path.relative(publicDir, fullPath);
// --- Format Success Response for MCP ---
const resultData = {
ok: true,
path: relativePath,
bytes: imgBytes.length,
model: input.referenceImagePaths && input.referenceImagePaths.length > 0 ? 'gpt-image-1' : (input.model ?? 'dall-e-3'),
prompt: finalPrompt,
revised_prompt: rsp.data[0].revised_prompt,
operation: input.referenceImagePaths && input.referenceImagePaths.length > 0 ? 'edit' : 'generate',
referenceImages: input.referenceImagePaths || []
};
return {
content: [
{
type: 'text',
text: JSON.stringify(resultData, null, 2), // Return result as JSON string
},
],
};
}
catch (err) {
console.error('❌ createImage tool error:', err); // Log the internal error
// --- Format Error Response for MCP ---
// Use McpError for structured errors, otherwise return a generic error message
if (err instanceof McpError) {
throw err; // Re-throw MCP errors
}
// For OpenAI API errors or other internal errors
return {
content: [
{
type: 'text',
text: `Failed to create image: ${err.message || 'Internal server error'}`,
},
],
isError: true,
};
}
});
}
// Start the server and connect transport
async run() {
console.error("[MCP DEBUG] run() method entered."); // Log run start
const transport = new StdioServerTransport();
console.error("[MCP DEBUG] StdioServerTransport created."); // Log transport creation
await this.server.connect(transport);
console.error("[MCP DEBUG] Server connected to transport. Waiting for requests..."); // Log connection success
// No console.log here for stdio transport
}
}
// Instantiate and run the server
console.error("[MCP DEBUG] Instantiating ImageMcpServer..."); // Log instantiation
const serverInstance = new ImageMcpServer();
console.error("[MCP DEBUG] ImageMcpServer instantiated. Calling run()..."); // Log before run
serverInstance.run().catch(error => {
console.error("Fatal error running Image MCP server:", error);
process.exit(1);
});