screen-view-mcp
Version:
MCP tool for capturing screenshots and analyzing them with Claude Vision API
148 lines (147 loc) • 5.71 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.analyzeImage = analyzeImage;
exports.analyzeScreenContent = analyzeScreenContent;
const path_1 = __importDefault(require("path"));
const sdk_1 = require("@anthropic-ai/sdk");
const screenshot_1 = require("./screenshot");
/**
* Detects image type from base64 data by checking file signature
* @param base64Image - Base64-encoded image data
* @returns The detected MIME type
*/
function detectImageType(base64Image) {
try {
// Convert the first few bytes of base64 to a buffer to check signature
const signatureBuffer = Buffer.from(base64Image.substring(0, 24), 'base64');
// Check PNG signature (89 50 4E 47)
if (signatureBuffer[0] === 0x89 &&
signatureBuffer[1] === 0x50 &&
signatureBuffer[2] === 0x4E &&
signatureBuffer[3] === 0x47) {
return 'image/png';
}
// Check JPEG signature (FF D8 FF)
if (signatureBuffer[0] === 0xFF &&
signatureBuffer[1] === 0xD8 &&
signatureBuffer[2] === 0xFF) {
return 'image/jpeg';
}
// Default to PNG if we can't detect
return 'image/png';
}
catch (error) {
console.error('Error detecting image type:', error);
return 'image/png'; // Default to PNG
}
}
/**
* Analyzes an image with Claude Vision API
* @param base64Image - Base64-encoded image data
* @param prompt - Prompt to send to Claude
* @param modelName - Claude model name to use
* @returns The analysis text
*/
async function analyzeImage(base64Image, prompt = 'What do you see in this screenshot?', modelName = 'claude-3-opus-20240229') {
try {
const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) {
throw new Error('ANTHROPIC_API_KEY environment variable is not set');
}
// Initialize the Anthropic client
const client = new sdk_1.Anthropic({
apiKey,
});
// Handle multiple possible formats of base64 data
const cleanedBase64 = base64Image.replace(/^data:image\/\w+;base64,/, '');
console.log('Debug: Base64 image length:', cleanedBase64.length);
// Detect image type
const mediaType = detectImageType(cleanedBase64);
console.log('Debug: Detected media type:', mediaType);
// Send to Claude for analysis
const response = await client.messages.create({
model: modelName,
max_tokens: 1024,
messages: [
{
role: 'user',
content: [
{ type: 'text', text: prompt },
{
type: 'image',
source: {
type: 'base64',
media_type: mediaType,
data: cleanedBase64,
},
},
],
},
],
});
return response.content[0].text;
}
catch (error) {
console.error('Claude Vision API error:', error);
throw error;
}
}
async function analyzeScreenContent(prompt, modelName = 'claude-3-haiku-20240307', saveScreenshot = false) {
try {
const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) {
throw new Error('ANTHROPIC_API_KEY environment variable is not set');
}
const client = new sdk_1.Anthropic({
apiKey,
});
// Capture screenshot as base64
const screenshotBase64 = await (0, screenshot_1.captureScreenshot)();
// Clean base64 data
const cleanedBase64 = screenshotBase64.replace(/^data:image\/\w+;base64,/, '');
console.log('Debug: Screenshot base64 length:', cleanedBase64.length);
// Detect image type
const mediaType = detectImageType(cleanedBase64);
console.log('Debug: Detected media type for screenshot:', mediaType);
// Save screenshot if requested
if (saveScreenshot) {
const timestamp = Date.now();
const screenshotDir = path_1.default.join(process.cwd(), 'screenshots');
const screenshotPath = path_1.default.join(screenshotDir, `screenshot_${timestamp}.png`);
await (0, screenshot_1.saveScreenshotToFile)(screenshotBase64, screenshotPath);
console.log(`Screenshot saved to ${screenshotPath}`);
}
// Analyze the screenshot with Claude
const response = await client.messages.create({
model: modelName,
max_tokens: 4096,
messages: [
{
role: 'user',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: mediaType,
data: cleanedBase64,
},
},
{
type: 'text',
text: prompt || 'Describe what you see in this screenshot in detail.',
},
],
},
],
});
return response.content[0].text;
}
catch (error) {
console.error('Error analyzing screen content:', error);
throw error;
}
}