UNPKG

screen-view-mcp

Version:

MCP tool for capturing screenshots and analyzing them with Claude Vision API

148 lines (147 loc) 5.71 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.analyzeImage = analyzeImage; exports.analyzeScreenContent = analyzeScreenContent; const path_1 = __importDefault(require("path")); const sdk_1 = require("@anthropic-ai/sdk"); const screenshot_1 = require("./screenshot"); /** * Detects image type from base64 data by checking file signature * @param base64Image - Base64-encoded image data * @returns The detected MIME type */ function detectImageType(base64Image) { try { // Convert the first few bytes of base64 to a buffer to check signature const signatureBuffer = Buffer.from(base64Image.substring(0, 24), 'base64'); // Check PNG signature (89 50 4E 47) if (signatureBuffer[0] === 0x89 && signatureBuffer[1] === 0x50 && signatureBuffer[2] === 0x4E && signatureBuffer[3] === 0x47) { return 'image/png'; } // Check JPEG signature (FF D8 FF) if (signatureBuffer[0] === 0xFF && signatureBuffer[1] === 0xD8 && signatureBuffer[2] === 0xFF) { return 'image/jpeg'; } // Default to PNG if we can't detect return 'image/png'; } catch (error) { console.error('Error detecting image type:', error); return 'image/png'; // Default to PNG } } /** * Analyzes an image with Claude Vision API * @param base64Image - Base64-encoded image data * @param prompt - Prompt to send to Claude * @param modelName - Claude model name to use * @returns The analysis text */ async function analyzeImage(base64Image, prompt = 'What do you see in this screenshot?', modelName = 'claude-3-opus-20240229') { try { const apiKey = process.env.ANTHROPIC_API_KEY; if (!apiKey) { throw new Error('ANTHROPIC_API_KEY environment variable is not set'); } // Initialize the Anthropic client const client = new sdk_1.Anthropic({ apiKey, }); // Handle multiple possible formats of base64 data const cleanedBase64 = base64Image.replace(/^data:image\/\w+;base64,/, ''); console.log('Debug: Base64 image length:', cleanedBase64.length); // Detect image type const mediaType = detectImageType(cleanedBase64); console.log('Debug: Detected media type:', mediaType); // Send to Claude for analysis const response = await client.messages.create({ model: modelName, max_tokens: 1024, messages: [ { role: 'user', content: [ { type: 'text', text: prompt }, { type: 'image', source: { type: 'base64', media_type: mediaType, data: cleanedBase64, }, }, ], }, ], }); return response.content[0].text; } catch (error) { console.error('Claude Vision API error:', error); throw error; } } async function analyzeScreenContent(prompt, modelName = 'claude-3-haiku-20240307', saveScreenshot = false) { try { const apiKey = process.env.ANTHROPIC_API_KEY; if (!apiKey) { throw new Error('ANTHROPIC_API_KEY environment variable is not set'); } const client = new sdk_1.Anthropic({ apiKey, }); // Capture screenshot as base64 const screenshotBase64 = await (0, screenshot_1.captureScreenshot)(); // Clean base64 data const cleanedBase64 = screenshotBase64.replace(/^data:image\/\w+;base64,/, ''); console.log('Debug: Screenshot base64 length:', cleanedBase64.length); // Detect image type const mediaType = detectImageType(cleanedBase64); console.log('Debug: Detected media type for screenshot:', mediaType); // Save screenshot if requested if (saveScreenshot) { const timestamp = Date.now(); const screenshotDir = path_1.default.join(process.cwd(), 'screenshots'); const screenshotPath = path_1.default.join(screenshotDir, `screenshot_${timestamp}.png`); await (0, screenshot_1.saveScreenshotToFile)(screenshotBase64, screenshotPath); console.log(`Screenshot saved to ${screenshotPath}`); } // Analyze the screenshot with Claude const response = await client.messages.create({ model: modelName, max_tokens: 4096, messages: [ { role: 'user', content: [ { type: 'image', source: { type: 'base64', media_type: mediaType, data: cleanedBase64, }, }, { type: 'text', text: prompt || 'Describe what you see in this screenshot in detail.', }, ], }, ], }); return response.content[0].text; } catch (error) { console.error('Error analyzing screen content:', error); throw error; } }