UNPKG

chrometools-mcp

Version:

MCP (Model Context Protocol) server for Chrome automation using Puppeteer. Persistent browser sessions, visual testing, Figma comparison, and design validation. Works seamlessly in WSL, Linux, and macOS.

github.com/docentovich/chrometools-mcp

docentovich/chrometools-mcp

1,231 lines (1,104 loc) • 101 kB

JavaScript

#!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import { z } from "zod"; import puppeteer from "puppeteer"; import Jimp from "jimp"; import pixelmatch from "pixelmatch"; import { writeFileSync, mkdirSync, readFileSync } from 'fs'; import { dirname } from 'path'; import { spawn } from 'child_process'; import http from 'http'; import { fileURLToPath } from 'url'; import path from 'path'; // Figma token from environment variable (can be set in MCP config) const FIGMA_TOKEN = process.env.FIGMA_TOKEN || null; // Get current directory for loading utils const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); // Load element finder utilities const elementFinderUtils = readFileSync(path.join(__dirname, 'element-finder-utils.js'), 'utf-8'); // Import hints generator import { generateNavigationHints, generateClickHints, generateFormSubmitHints, generatePageHints } from './utils/hints-generator.js'; // Import Recorder modules import { injectRecorder } from './recorder/recorder-script.js'; import { executeScenario } from './recorder/scenario-executor.js'; import { initializeStorage, saveScenario, loadScenario, listScenarios, searchScenarios, deleteScenario } from './recorder/scenario-storage.js'; // Detect WSL environment const isWSL = (() => { try { const fs = require('fs'); const proc_version = fs.readFileSync('/proc/version', 'utf8').toLowerCase(); return proc_version.includes('microsoft') || proc_version.includes('wsl'); } catch { return false; } })(); // Detect Windows environment (including WSL) const isWindows = process.platform === 'win32' || isWSL; // Get Chrome executable path based on platform function getChromePath() { if (process.platform === 'win32') { // Native Windows return 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'; } else if (isWSL) { // WSL - use Windows Chrome return '/mnt/c/Program Files/Google/Chrome/Application/chrome.exe'; } else { // Linux return '/usr/bin/google-chrome'; } } // Get temp directory based on platform function getTempDir() { if (process.platform === 'win32') { return process.env.TEMP || 'C:\\Windows\\Temp'; } else if (isWSL) { return '/mnt/c/Windows/Temp'; } else { return process.env.TMPDIR || '/tmp'; } } // Global browser instance (persists between requests) let browserPromise = null; const openPages = new Map(); let lastPage = null; let chromeProcess = null; // Console logs storage const consoleLogs = []; // Network requests storage const networkRequests = []; // Page analysis cache (method 4) const pageAnalysisCache = new Map(); // Track pages with recorder injected const pagesWithRecorder = new WeakSet(); // Debug port for Chrome remote debugging const CHROME_DEBUG_PORT = 9222; // Helper function to get WebSocket endpoint from Chrome async function getChromeWebSocketEndpoint(port = CHROME_DEBUG_PORT, maxRetries = 10) { for (let i = 0; i < maxRetries; i++) { try { const response = await new Promise((resolve, reject) => { const req = http.get(`http://localhost:${port}/json/version`, (res) => { let data = ''; res.on('data', chunk => data += chunk); res.on('end', () => resolve(data)); }); req.on('error', reject); req.setTimeout(1000); }); const info = JSON.parse(response); if (info.webSocketDebuggerUrl) { return info.webSocketDebuggerUrl; } } catch (err) { // Chrome might not be ready yet, wait and retry await new Promise(resolve => setTimeout(resolve, 500)); } } throw new Error('Could not get Chrome WebSocket endpoint after multiple retries'); } // Initialize browser (singleton) async function getBrowser() { if (!browserPromise) { browserPromise = (async () => { try { let browser; let endpoint; // Try to connect to existing Chrome with remote debugging try { endpoint = await getChromeWebSocketEndpoint(CHROME_DEBUG_PORT, 2); browser = await puppeteer.connect({ browserWSEndpoint: endpoint, defaultViewport: null, }); console.error("[chrometools-mcp] Connected to existing Chrome instance"); console.error("[chrometools-mcp] WebSocket endpoint:", endpoint); return browser; } catch (connectError) { console.error("[chrometools-mcp] No existing Chrome found, launching new instance..."); } // Launch new Chrome with remote debugging enabled const chromePath = getChromePath(); const userDataDir = `${getTempDir()}/chrome-mcp-profile`; console.error("[chrometools-mcp] Chrome path:", chromePath); console.error("[chrometools-mcp] User data dir:", userDataDir); chromeProcess = spawn(chromePath, [ `--remote-debugging-port=${CHROME_DEBUG_PORT}`, '--no-first-run', '--no-default-browser-check', `--user-data-dir=${userDataDir}`, ], { detached: true, stdio: 'ignore', }); chromeProcess.unref(); // Allow Node to exit even if Chrome is running console.error("[chrometools-mcp] Chrome launched with remote debugging on port", CHROME_DEBUG_PORT); // Wait for Chrome to start and get the endpoint endpoint = await getChromeWebSocketEndpoint(CHROME_DEBUG_PORT, 20); // Connect to the Chrome instance browser = await puppeteer.connect({ browserWSEndpoint: endpoint, defaultViewport: null, }); console.error("[chrometools-mcp] Connected to Chrome instance"); console.error("[chrometools-mcp] WebSocket endpoint:", endpoint); return browser; } catch (error) { // Check if it's a display-related error in WSL if (isWSL && ( error.message.includes('DISPLAY') || error.message.includes('connect ECONNREFUSED') || error.message.includes('cannot open display') )) { const helpMessage = ` ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ❌ WSL X Server Error Detected You are running in WSL environment with headless:false mode. This requires an X server to display the browser GUI. 🔧 Solution: 1. Start X server on Windows (e.g., VcXsrv, X410) 2. Set DISPLAY in your MCP config: { "mcpServers": { "chrometools": { "env": { "DISPLAY": "172.25.96.1:0" } } } } 📚 For detailed setup instructions, see: WSL_SETUP.md in chrometools-mcp package 💡 Alternative: Run in headless mode (modify index.js) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ `; console.error(helpMessage); throw new Error(`WSL X Server not available. ${error.message}\n\nSee above for setup instructions.`); } // Re-throw other errors as-is throw error; } })(); } return browserPromise; } // Setup navigation listener for recorder auto-reinjection // Track pages with network monitoring to prevent duplicate setup const pagesWithNetworkMonitoring = new WeakSet(); // Setup network monitoring with auto-reinitialization on navigation async function setupNetworkMonitoring(page) { // Prevent duplicate setup on the same page if (pagesWithNetworkMonitoring.has(page)) { return; } pagesWithNetworkMonitoring.add(page); const client = await page.target().createCDPSession(); await client.send('Network.enable'); client.on('Network.requestWillBeSent', (event) => { const timestamp = new Date().toISOString(); networkRequests.push({ requestId: event.requestId, url: event.request.url, method: event.request.method, headers: event.request.headers, postData: event.request.postData, timestamp, type: event.type, // Document, Stylesheet, Image, Media, Font, Script, XHR, Fetch, etc. initiator: event.initiator.type, // parser, script, other status: 'pending', documentURL: event.documentURL }); }); client.on('Network.responseReceived', (event) => { const req = networkRequests.find(r => r.requestId === event.requestId); if (req) { req.status = event.response.status; req.statusText = event.response.statusText; req.responseHeaders = event.response.headers; req.mimeType = event.response.mimeType; req.fromCache = event.response.fromDiskCache || event.response.fromServiceWorker; req.timing = event.response.timing; } }); client.on('Network.loadingFinished', (event) => { const req = networkRequests.find(r => r.requestId === event.requestId); if (req && req.status === 'pending') { req.status = 'completed'; } if (req) { req.encodedDataLength = event.encodedDataLength; req.finishedTimestamp = new Date().toISOString(); } }); client.on('Network.loadingFailed', (event) => { const req = networkRequests.find(r => r.requestId === event.requestId); if (req) { req.status = 'failed'; req.errorText = event.errorText; req.canceled = event.canceled; req.finishedTimestamp = new Date().toISOString(); } }); // Auto-reinitialize on navigation (CDP session is reset on navigation) let lastUrl = page.url(); page.on('framenavigated', async (frame) => { // Only handle main frame navigation if (frame !== page.mainFrame()) return; const currentUrl = frame.url(); // Skip if URL hasn't changed if (currentUrl === lastUrl) return; lastUrl = currentUrl; // Remove from tracking set to allow re-setup pagesWithNetworkMonitoring.delete(page); // Small delay to let navigation settle setTimeout(async () => { try { await setupNetworkMonitoring(page); } catch (error) { console.error('[chrometools-mcp] Failed to reinitialize network monitoring:', error.message); } }, 100); }); } async function setupRecorderAutoReinjection(page) { let reinjectionTimeout = null; let lastUrl = null; // Handle navigation events (form submits, link clicks, history API) page.on('framenavigated', async (frame) => { // Only handle main frame navigation if (frame !== page.mainFrame()) return; // Get current URL const currentUrl = frame.url(); // Skip if URL hasn't changed (prevents duplicate injections on same page) if (currentUrl === lastUrl) { return; } lastUrl = currentUrl; // Clear any pending reinjection if (reinjectionTimeout) { clearTimeout(reinjectionTimeout); } // Debounce reinjection (wait 100ms for navigation to settle) reinjectionTimeout = setTimeout(async () => { // Check if this page had recorder before if (pagesWithRecorder.has(page)) { try { await injectRecorder(page); } catch (error) { console.error('[chrometools-mcp] Failed to re-inject recorder:', error.message); } } }, 100); }); // Handle page reloads (F5, Ctrl+R) - use 'load' event page.on('load', async () => { // Check if this page had recorder before if (pagesWithRecorder.has(page)) { try { await injectRecorder(page); } catch (error) { console.error('[chrometools-mcp] Failed to re-inject recorder after reload:', error.message); } } }); } // Get or create page for URL async function getOrCreatePage(url) { const browser = await getBrowser(); // Check if page for this URL already exists if (openPages.has(url)) { const existingPage = openPages.get(url); if (!existingPage.isClosed()) { lastPage = existingPage; return existingPage; } openPages.delete(url); } // Create new page const page = await browser.newPage(); // Set up console log capture const client = await page.target().createCDPSession(); await client.send('Runtime.enable'); await client.send('Log.enable'); client.on('Runtime.consoleAPICalled', (event) => { const timestamp = new Date().toISOString(); const args = event.args.map(arg => { if (arg.value !== undefined) return arg.value; if (arg.description) return arg.description; return String(arg); }); consoleLogs.push({ type: event.type, // log, warn, error, info, debug timestamp, message: args.join(' '), stackTrace: event.stackTrace }); }); client.on('Log.entryAdded', (event) => { const entry = event.entry; consoleLogs.push({ type: entry.level, // verbose, info, warning, error timestamp: new Date(entry.timestamp).toISOString(), message: entry.text, source: entry.source, url: entry.url, lineNumber: entry.lineNumber }); }); // Setup network monitoring with auto-reinitialization on navigation await setupNetworkMonitoring(page); // Setup recorder auto-reinjection on navigation setupRecorderAutoReinjection(page); await page.goto(url, { waitUntil: 'networkidle2' }); openPages.set(url, page); lastPage = page; return page; } // Get last opened page (for tools that don't need URL) async function getLastOpenPage() { if (!lastPage || lastPage.isClosed()) { throw new Error('No page is currently open. Use openBrowser first to open a page.'); } // Setup recorder auto-reinjection if not already set up // Check if page already has navigation listener const listenerCount = lastPage.listenerCount('framenavigated'); if (listenerCount === 0) { setupRecorderAutoReinjection(lastPage); } return lastPage; } // Helper function to normalize Figma node ID (convert URL format to API format) function normalizeFigmaNodeId(nodeId) { // Figma URLs use format like "47361-19211" but API expects "47361:19211" // This function automatically converts between formats return nodeId.replace(/-/g, ':'); } // Figma API helper function async function fetchFigmaAPI(endpoint, figmaToken) { if (!figmaToken) { throw new Error('Figma token is required. Get it from https://www.figma.com/developers/api#access-tokens'); } const response = await fetch(`https://api.figma.com/v1/${endpoint}`, { headers: { 'X-Figma-Token': figmaToken } }); if (!response.ok) { const error = await response.text(); throw new Error(`Figma API error: ${response.status} - ${error}`); } return response.json(); } // Helper function to process screenshot with compression and scaling async function processScreenshot(screenshotBuffer, options = {}) { const { maxWidth = 1024, maxHeight = 8000, // API limit is 8000px quality = 80, format = 'auto' } = options; // Load image with Jimp const image = await Jimp.read(screenshotBuffer); const originalWidth = image.bitmap.width; const originalHeight = image.bitmap.height; const originalSize = screenshotBuffer.length; let processed = false; // Apply scaling if needed to fit within maxWidth and maxHeight if (maxWidth !== null || maxHeight !== null) { let newWidth = originalWidth; let newHeight = originalHeight; // Calculate scale factors for both dimensions let scaleWidth = 1.0; let scaleHeight = 1.0; if (maxWidth !== null && originalWidth > maxWidth) { scaleWidth = maxWidth / originalWidth; } if (maxHeight !== null && originalHeight > maxHeight) { scaleHeight = maxHeight / originalHeight; } // Use the smaller scale factor to ensure both dimensions fit const scale = Math.min(scaleWidth, scaleHeight); if (scale < 1.0) { newWidth = Math.round(originalWidth * scale); newHeight = Math.round(originalHeight * scale); image.resize(newWidth, newHeight); processed = true; } } // Determine output format let outputFormat = format; let mimeType = 'image/png'; if (format === 'auto') { // Auto-select: use JPEG for large images, PNG for small const estimatedSize = image.bitmap.width * image.bitmap.height * 4; outputFormat = estimatedSize > 500000 ? 'jpeg' : 'png'; // ~500KB threshold } // Convert to buffer with appropriate format and quality let resultBuffer; if (outputFormat === 'jpeg') { image.quality(quality); resultBuffer = await image.getBufferAsync(Jimp.MIME_JPEG); mimeType = 'image/jpeg'; processed = true; } else { resultBuffer = await image.getBufferAsync(Jimp.MIME_PNG); mimeType = 'image/png'; } // Return original if no processing was needed and format is PNG if (!processed && outputFormat === 'png') { return { buffer: screenshotBuffer, mimeType: 'image/png', metadata: { width: originalWidth, height: originalHeight, originalSize, finalSize: screenshotBuffer.length, format: 'png', compressed: false, scaled: false } }; } return { buffer: resultBuffer, mimeType, metadata: { width: image.bitmap.width, height: image.bitmap.height, originalWidth, originalHeight, originalSize, finalSize: resultBuffer.length, format: outputFormat, compressed: outputFormat === 'jpeg', scaled: processed, compressionRatio: Math.round((1 - resultBuffer.length / originalSize) * 100) } }; } // Calculate SSIM (Structural Similarity Index) for image comparison function calculateSSIM(img1Data, img2Data, width, height) { if (img1Data.length !== img2Data.length) { return 0; } const windowSize = 8; const k1 = 0.01; const k2 = 0.03; const c1 = (k1 * 255) ** 2; const c2 = (k2 * 255) ** 2; let ssimSum = 0; let validWindows = 0; for (let y = 0; y <= height - windowSize; y += windowSize) { for (let x = 0; x <= width - windowSize; x += windowSize) { let sum1 = 0, sum2 = 0, sum1Sq = 0, sum2Sq = 0, sum12 = 0; for (let dy = 0; dy < windowSize; dy++) { for (let dx = 0; dx < windowSize; dx++) { const idx = ((y + dy) * width + (x + dx)) * 4; if (idx + 2 >= img1Data.length) continue; const gray1 = (img1Data[idx] * 0.299 + img1Data[idx + 1] * 0.587 + img1Data[idx + 2] * 0.114); const gray2 = (img2Data[idx] * 0.299 + img2Data[idx + 1] * 0.587 + img2Data[idx + 2] * 0.114); sum1 += gray1; sum2 += gray2; sum1Sq += gray1 * gray1; sum2Sq += gray2 * gray2; sum12 += gray1 * gray2; } } const n = windowSize * windowSize; const mean1 = sum1 / n; const mean2 = sum2 / n; const variance1 = (sum1Sq / n) - (mean1 * mean1); const variance2 = (sum2Sq / n) - (mean2 * mean2); const covariance = (sum12 / n) - (mean1 * mean2); const ssim = ((2 * mean1 * mean2 + c1) * (2 * covariance + c2)) / ((mean1 * mean1 + mean2 * mean2 + c1) * (variance1 + variance2 + c2)); ssimSum += ssim; validWindows++; } } return validWindows > 0 ? ssimSum / validWindows : 0; } // Cleanup on exit process.on("SIGINT", async () => { if (browserPromise) { const browser = await browserPromise; await browser.close(); } process.exit(0); }); // Create MCP server const server = new Server( { name: "chrometools-mcp", version: "1.0.2", }, { capabilities: { tools: {}, }, } ); // Tool schemas const PingSchema = z.object({ message: z.string().optional().describe("Optional message to send"), }); const OpenBrowserSchema = z.object({ url: z.string().describe("URL to open in the browser"), }); const ClickSchema = z.object({ selector: z.string().describe("CSS selector for element to click"), waitAfter: z.number().optional().describe("Milliseconds to wait after click (default: 1500)"), screenshot: z.boolean().optional().describe("Capture screenshot after click (default: false for performance)"), timeout: z.number().optional().describe("Maximum time to wait for operation in ms (default: 30000)"), }); const TypeSchema = z.object({ selector: z.string().describe("CSS selector for input element"), text: z.string().describe("Text to type"), delay: z.number().optional().describe("Delay between keystrokes in ms (default: 0)"), clearFirst: z.boolean().optional().describe("Clear field before typing (default: true)"), }); const GetElementSchema = z.object({ selector: z.string().optional().describe("CSS selector (optional, defaults to body)"), }); const GetComputedCssSchema = z.object({ selector: z.string().optional().describe("CSS selector (optional, defaults to body)"), }); const GetBoxModelSchema = z.object({ selector: z.string().describe("CSS selector for element"), }); const ScreenshotSchema = z.object({ selector: z.string().describe("CSS selector for element to screenshot"), padding: z.number().optional().describe("Padding around element in pixels (default: 0)"), maxWidth: z.number().nullable().optional().describe("Maximum width in pixels, auto-scales if larger (default: 1024, set to null for original size)"), maxHeight: z.number().nullable().optional().describe("Maximum height in pixels, auto-scales if larger (default: 8000 for API limit, set to null for original size)"), quality: z.number().min(1).max(100).optional().describe("JPEG quality 1-100 (default: 80, only applies to JPEG format)"), format: z.enum(['png', 'jpeg', 'auto']).optional().describe("Image format: 'png', 'jpeg', or 'auto' (default: 'auto' - chooses based on size)"), }); const SaveScreenshotSchema = z.object({ selector: z.string().describe("CSS selector for element to screenshot"), filePath: z.string().describe("Absolute path where to save file"), padding: z.number().optional().describe("Padding around element in pixels (default: 0)"), maxWidth: z.number().nullable().optional().describe("Maximum width in pixels, auto-scales if larger (default: 1024, set to null for original size)"), maxHeight: z.number().nullable().optional().describe("Maximum height in pixels, auto-scales if larger (default: 8000 for API limit, set to null for original size)"), quality: z.number().min(1).max(100).optional().describe("JPEG quality 1-100 (default: 80, only applies to JPEG format)"), format: z.enum(['png', 'jpeg', 'auto']).optional().describe("Image format: 'png', 'jpeg', or 'auto' (default: 'auto' - chooses based on size)"), }); const ScrollToSchema = z.object({ selector: z.string().describe("CSS selector for element to scroll to"), behavior: z.enum(['auto', 'smooth']).optional().describe("Scroll behavior (default: auto)"), }); const ExecuteScriptSchema = z.object({ script: z.string().describe("JavaScript code to execute in page context"), waitAfter: z.number().optional().describe("Milliseconds to wait after execution (default: 500)"), screenshot: z.boolean().optional().describe("Capture screenshot after execution (default: false for performance)"), timeout: z.number().optional().describe("Maximum time to wait for operation in ms (default: 30000)"), }); // Phase 2 schemas const GetConsoleLogsSchema = z.object({ types: z.array(z.enum(['log', 'warn', 'error', 'info', 'debug', 'verbose', 'warning'])) .optional() .describe("Filter by log types (default: all)"), clear: z.boolean().optional().describe("Clear logs after reading (default: false)"), }); const GetNetworkRequestsSchema = z.object({ types: z.array(z.enum(['Document', 'Stylesheet', 'Image', 'Media', 'Font', 'Script', 'XHR', 'Fetch', 'WebSocket', 'Other'])) .optional() .describe("Filter by request types (default: all)"), status: z.enum(['pending', 'completed', 'failed', 'all']) .optional() .describe("Filter by status (default: all)"), urlPattern: z.string().optional().describe("Filter by URL pattern (regex)"), clear: z.boolean().optional().describe("Clear requests after reading (default: false)"), }); const HoverSchema = z.object({ selector: z.string().describe("CSS selector for element to hover"), }); const SetStylesSchema = z.object({ selector: z.string().describe("CSS selector for element to modify"), styles: z.array(z.object({ name: z.string().describe("CSS property name (e.g., 'color')"), value: z.string().describe("CSS property value (e.g., 'red')") })).describe("Array of CSS property name-value pairs"), }); const SetViewportSchema = z.object({ width: z.number().min(320).max(4000).describe("Viewport width in pixels (320-4000)"), height: z.number().min(200).max(3000).describe("Viewport height in pixels (200-3000)"), deviceScaleFactor: z.number().min(0.5).max(3).optional().describe("Device pixel ratio (0.5-3, default: 1)"), }); const GetViewportSchema = z.object({}); const NavigateToSchema = z.object({ url: z.string().describe("URL to navigate to"), waitUntil: z.enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2']) .optional() .describe("Wait until event (default: networkidle2)"), }); // Figma tools schemas const GetFigmaFrameSchema = z.object({ figmaToken: z.string().optional().describe("Figma API token (optional if FIGMA_TOKEN env var is set)"), fileKey: z.string().describe("Figma file key (from URL: figma.com/file/FILE_KEY/...)"), nodeId: z.string().describe("Figma node ID (frame/component ID)"), scale: z.number().min(0.1).max(4).optional().describe("Export scale (0.1-4, default: 2)"), format: z.enum(['png', 'jpg', 'svg']).optional().describe("Export format (default: png)") }); const CompareFigmaToElementSchema = z.object({ figmaToken: z.string().optional().describe("Figma API token (optional if FIGMA_TOKEN env var is set)"), fileKey: z.string().describe("Figma file key"), nodeId: z.string().describe("Figma frame/component ID"), selector: z.string().describe("CSS selector for page element"), threshold: z.number().min(0).max(1).optional().describe("Difference threshold (0-1, default: 0.05)"), figmaScale: z.number().min(0.1).max(4).optional().describe("Figma export scale (default: 2)") }); const GetFigmaSpecsSchema = z.object({ figmaToken: z.string().optional().describe("Figma API token (optional if FIGMA_TOKEN env var is set)"), fileKey: z.string().describe("Figma file key"), nodeId: z.string().describe("Figma frame/component ID") }); // New AI optimization tools schemas const SmartFindElementSchema = z.object({ description: z.string().describe("Natural language description of element to find (e.g., 'login button', 'email field')"), maxResults: z.number().min(1).max(20).optional().describe("Maximum number of candidates to return (default: 5)"), action: z.object({ type: z.enum(['click', 'type', 'scrollTo', 'screenshot', 'hover', 'setStyles']).describe("Action to perform on the best match"), text: z.string().optional().describe("Text to type (required for 'type' action)"), styles: z.array(z.object({ name: z.string(), value: z.string() })).optional().describe("Styles to apply (required for 'setStyles' action)"), screenshot: z.boolean().optional().describe("Capture screenshot after action (default: false)"), waitAfter: z.number().optional().describe("Wait time in ms after action"), }).optional().describe("Optional action to perform on the best matching element"), }); const AnalyzePageSchema = z.object({ refresh: z.boolean().optional().describe("Force refresh of cached analysis (default: false)"), }); const GetAllInteractiveElementsSchema = z.object({ includeHidden: z.boolean().optional().describe("Include hidden elements (default: false)"), }); const FindElementsByTextSchema = z.object({ text: z.string().describe("Text to search for in elements"), exact: z.boolean().optional().describe("Exact match only (default: false)"), caseSensitive: z.boolean().optional().describe("Case sensitive search (default: false)"), action: z.object({ type: z.enum(['click', 'type', 'scrollTo', 'screenshot', 'hover', 'setStyles']).describe("Action to perform on the first match"), text: z.string().optional().describe("Text to type (required for 'type' action)"), styles: z.array(z.object({ name: z.string(), value: z.string() })).optional().describe("Styles to apply (required for 'setStyles' action)"), screenshot: z.boolean().optional().describe("Capture screenshot after action (default: false)"), waitAfter: z.number().optional().describe("Wait time in ms after action"), }).optional().describe("Optional action to perform on the first matching element"), }); // List available tools server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: "ping", description: "Simple ping-pong tool for testing. Returns 'pong' with optional message.", inputSchema: { type: "object", properties: { message: { type: "string", description: "Optional message to include in response" }, }, }, }, { name: "openBrowser", description: "Opens a browser window and navigates to the specified URL. Browser window remains open for further interactions. Use this as the first step before other tools.", inputSchema: { type: "object", properties: { url: { type: "string", description: "URL to navigate to (e.g., https://example.com)" }, }, required: ["url"], }, }, { name: "click", description: "Click on an element to trigger interactions like opening modals, navigating, or submitting forms. Waits for animations. Screenshot is optional for better performance.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element to click" }, waitAfter: { type: "number", description: "Milliseconds to wait after click (default: 1500)" }, screenshot: { type: "boolean", description: "Capture screenshot after click (default: false for performance)" }, timeout: { type: "number", description: "Maximum time to wait for operation in ms (default: 30000)" }, }, required: ["selector"], }, }, { name: "type", description: "Type text into an input field, textarea, or contenteditable element. Can optionally clear the field first and control typing speed for realistic input simulation.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for input element" }, text: { type: "string", description: "Text to type" }, delay: { type: "number", description: "Delay between keystrokes in ms (default: 0)" }, clearFirst: { type: "boolean", description: "Clear field before typing (default: true)" }, }, required: ["selector", "text"], }, }, { name: "getElement", description: "Get the HTML markup of an element for inspection and debugging. If no selector is provided, returns the entire <body> element. Useful for understanding component structure.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector (optional, defaults to body)" }, }, }, }, { name: "getComputedCss", description: "Get all computed CSS styles applied to an element. Essential for debugging layout issues, checking responsive design, and verifying CSS properties. Returns complete computed styles.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector (optional, defaults to body)" }, }, }, }, { name: "getBoxModel", description: "Get precise element dimensions, positioning, margins, padding, and borders. Returns complete box model data including content, padding, border, and margin dimensions.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element" }, }, required: ["selector"], }, }, { name: "screenshot", description: "Capture an optimized screenshot of a specific element. By default, auto-scales large images to 1024px width and 8000px height (API limit) and uses smart compression to reduce AI context usage. Perfect for visual documentation and design reviews. Use maxWidth: null and format: 'png' for original quality.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element to screenshot" }, padding: { type: "number", description: "Padding around element in pixels (default: 0)" }, maxWidth: { type: "number", description: "Maximum width in pixels, auto-scales if larger (default: 1024, set to null for original size)" }, maxHeight: { type: "number", description: "Maximum height in pixels, auto-scales if larger (default: 8000 for API limit, set to null for original size)" }, quality: { type: "number", minimum: 1, maximum: 100, description: "JPEG quality 1-100 (default: 80, only applies to JPEG format)" }, format: { type: "string", enum: ["png", "jpeg", "auto"], description: "Image format: 'png', 'jpeg', or 'auto' (default: 'auto' - chooses based on size)" }, }, required: ["selector"], }, }, { name: "saveScreenshot", description: "Save optimized screenshot directly to filesystem without returning in context. By default, auto-scales to 1024px width and 8000px height (API limit) and uses smart compression. Perfect for baseline screenshots and reducing file sizes. Use maxWidth: null and format: 'png' for original quality.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element to screenshot" }, filePath: { type: "string", description: "Absolute path where to save file (extension auto-adjusted based on format)" }, padding: { type: "number", description: "Padding around element in pixels (default: 0)" }, maxWidth: { type: "number", description: "Maximum width in pixels, auto-scales if larger (default: 1024, set to null for original size)" }, maxHeight: { type: "number", description: "Maximum height in pixels, auto-scales if larger (default: 8000 for API limit, set to null for original size)" }, quality: { type: "number", minimum: 1, maximum: 100, description: "JPEG quality 1-100 (default: 80, only applies to JPEG format)" }, format: { type: "string", enum: ["png", "jpeg", "auto"], description: "Image format: 'png', 'jpeg', or 'auto' (default: 'auto' - chooses based on size)" }, }, required: ["selector", "filePath"], }, }, { name: "scrollTo", description: "Scroll the page to bring an element into view. Useful for testing lazy loading, sticky elements, and ensuring elements are visible. Supports smooth or instant scrolling.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element to scroll to" }, behavior: { type: "string", enum: ["auto", "smooth"], description: "Scroll behavior (default: auto)" }, }, required: ["selector"], }, }, { name: "executeScript", description: "Execute arbitrary JavaScript code in the page context. Perfect for complex interactions, setting values, triggering events, or any custom page manipulation. Returns execution result. Screenshot is optional for better performance.", inputSchema: { type: "object", properties: { script: { type: "string", description: "JavaScript code to execute" }, waitAfter: { type: "number", description: "Milliseconds to wait after execution (default: 500)" }, screenshot: { type: "boolean", description: "Capture screenshot after execution (default: false for performance)" }, timeout: { type: "number", description: "Maximum time to wait for operation in ms (default: 30000)" }, }, required: ["script"], }, }, { name: "getConsoleLogs", description: "Retrieve all console.log, console.warn, console.error messages from the browser. Essential for debugging JavaScript errors and tracking application behavior. Logs are captured automatically from page load.", inputSchema: { type: "object", properties: { types: { type: "array", items: { type: "string", enum: ["log", "warn", "error", "info", "debug", "verbose", "warning"] }, description: "Filter by log types (default: all)" }, clear: { type: "boolean", description: "Clear logs after reading (default: false)" }, }, }, }, { name: "getNetworkRequests", description: "Retrieve all network requests (XHR, Fetch, API calls, resources) made by the browser. Essential for debugging API calls, monitoring backend requests, and tracking resource loading. Requests are captured automatically from page load.", inputSchema: { type: "object", properties: { types: { type: "array", items: { type: "string", enum: ["Document", "Stylesheet", "Image", "Media", "Font", "Script", "XHR", "Fetch", "WebSocket", "Other"] }, description: "Filter by request types (default: all)" }, status: { type: "string", enum: ["pending", "completed", "failed", "all"], description: "Filter by status (default: all)" }, urlPattern: { type: "string", description: "Filter by URL pattern (regex)" }, clear: { type: "boolean", description: "Clear requests after reading (default: false)" }, }, }, }, { name: "hover", description: "Simulate mouse hover over an element to test hover effects, tooltips, dropdown menus, and interactive states. Essential for testing CSS :hover pseudo-classes.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element to hover" }, }, required: ["selector"], }, }, { name: "setStyles", description: "Apply inline CSS styles to an element for live editing and prototyping. Perfect for testing design changes without modifying source code.", inputSchema: { type: "object", properties: { selector: { type: "string", description: "CSS selector for element to modify" }, styles: { type: "array", items: { type: "object", properties: { name: { type: "string", description: "CSS property name" }, value: { type: "string", description: "CSS property value" }, }, required: ["name", "value"], }, description: "Array of CSS property name-value pairs", }, }, required: ["selector", "styles"], }, }, { name: "setViewport", description: "Change viewport dimensions for responsive design testing. Test how your layout adapts to different screen sizes, mobile devices, tablets, and desktop resolutions.", inputSchema: { type: "object", properties: { width: { type: "number", minimum: 320, maximum: 4000, description: "Viewport width in pixels" }, height: { type: "number", minimum: 200, maximum: 3000, description: "Viewport height in pixels" }, deviceScaleFactor: { type: "number", minimum: 0.5, maximum: 3, description: "Device pixel ratio (default: 1)" }, }, required: ["width", "height"], }, }, { name: "getViewport", description: "Get current viewport size and device pixel ratio. Essential for responsive design testing and understanding how content fits on different screen sizes.", inputSchema: { type: "object", properties: {}, }, }, { name: "navigateTo", description: "Navigate the current page to a new URL. Use this when you need to move to a different page while keeping the same browser instance. Page will be reused if already open.", inputSchema: { type: "object", properties: { url: { type: "string", description: "URL to navigate to" }, waitUntil: { type: "string", enum: ["load", "domcontentloaded", "networkidle0", "networkidle2"], description: "Wait until event (default: networkidle2)" }, }, required: ["url"], }, }, { name: "getFigmaFrame", description: "Export and download a Figma frame as PNG image for comparison. Requires Figma API token and file/node IDs from Figma URLs.", inputSchema: { type: "object", properties: { figmaToken: { type: "string", description: "Figma API token (optional if FIGMA_TOKEN env var is set)" }, fileKey: { type: "string", description: "Figma file key (from URL: figma.com/file/FILE_KEY/...)" }, nodeId: { type: "string", description: "Figma node ID (frame/component ID)" }, scale: { type: "number", minimum: 0.1, maximum: 4, description: "Export scale (0.1-4, default: 2)" }, format: { type: "string", enum: ["png", "jpg", "svg"], description: "Export format (default: png)" }, }, required: ["fileKey", "nodeId"], }, }, { name: "compareFigmaToElement", description: "Compare Figma design directly with browser implementation. The GOLD STANDARD for design-to-code validation. Fetches Figma frame, screenshots element, performs pixel-perfect comparison with difference analysis.", inputSchema: { type: "object", properties: { figmaToken: { type: "string", description: "Figma API token (optional if FIGMA_TOKEN env var is set)" }, fileKey: { type: "string", description: "Figma file key" }, nodeId: { type: "string", description: "Figma frame/component ID" }, selector: { type: "string", description: "CSS selector for page element" }, threshold: { type: "number", minimum: 0, maximum: 1, description: "Difference threshold (0-1, default: 0.05)" }, figmaScale: { type: "number", minimum: 0.1, maximum: 4, description: "Figma export scale (default: 2)" }, }, required: ["fileKey", "nodeId", "selector"], }, }, { name: "getFigmaSpecs", description: "Extract detailed design specifications from Figma including colors, fonts, dimensions, and spacing. Perfect for design-to-code comparison.", inputSchema: { type: "object", properties: { figmaToken: { type: "string", description: "Figma API token (optional if FIGMA_TOKEN env var is set)" }, fileKey: { type: "string", description: "Figma file key" }, nodeId: { type: "string", description: "Figma frame/component ID" }, }, required: ["fileKey", "nodeId"], }, }, { name: "smartFindElement", description: "AI-powered element finder that uses natural language to locate elements. Returns multiple candidates ranked by relevance. Can optionally perform actions (click, type, etc.) on the best match immediately.", inputSchema: { type: "object", properties: { description: { type: "string", description: "Natural language description (e.g., 'login button', 'email input', 'submit form')" }, maxResults: { type: "number", minimum: 1, maximum: 20, description: "Max candidates to return (default: 5)" }, action: { type: "object", properties: { type: { type: "string", enum: ["click", "type", "scrollTo", "screenshot", "hover", "setStyles"], description: "Action to perform on best match" }, text: { type: "string", description: "Text to type (for 'type' action)" }, styles: { type: "array", items: { type: "object", properties: { name: { type: "string" }, value: { type: "string" } } }, description: "Styles to apply (for 'setStyles' action)" }, screenshot: { type: "boolean", description: "Capture screenshot after action (default: false)" }, waitAfter: { type: "number", description: "Wait time in ms after action" }, }, required: ["type"], description: "Optional action to perform on the best matching element", }, }, required: ["description"], }, }, { name: "analyzePage", description: "Comprehensive page analysis that returns complete structure: all forms, inputs, buttons, links, and interactive elements with their selectors. Cached for fast repeated access. Use this ONCE at page load to understand the entire page structure.", inputSchema: { type: "object", properties: { refresh: { type: "boolean", description: "Force refresh cached analysis (default: false)" }, }, }, }, { name: "getAllInteractiveElements", description: "Get all clickable and interactive elements on the page with their selectors and descriptions. Perfect for understanding what actions are available.", inputSchema: { type: "object", properties: { includeHidden: { type: "boolean", description: "Include hidden elements (default: false)" }, }, }, }, { name: "findElementsByText", description: "Find all elements containing specific text. Returns elements with their selectors. Can optionally perform actions (click, type, etc.) on the first match immediately.", inputSchema: { type: "object", properties: { text: { type: "string", description: "Text to search for" }, exact: { type: "boolean", description: "Exact match only (default: false)" }, caseSensitive: { type: "boolean", description: "Case sensitive (default: false)" }, action: { type: "object", properties: { type: { type: "string", enum: ["click", "type", "scrollTo", "screenshot", "hover", "setStyles"], description: "Action to perform on first match" }, text: { type: "string", description: "Text to type (for 'type' action)" }, styles: { type: "array", items: { type: "object", properties: { name: { type: "string" }, value: { type: "string" } } }, description: "Styles to apply (for 'setStyles' action)" }, screenshot: { type: "boolean", description: "Capture screenshot after action (default: false)" }, waitAfter: { type: "number", description: "Wait time in ms after action" }, }, required: ["type"], description: "Optional action to perform on the first matching element", }, }, required: ["text"], }, }, { name: "enableRecorder", description: "Inject recorder UI widget into the current page. Enables visual recording of user interactions with start/stop/save controls.", inputSchema: { type: "object", properties: {}, }, }, { name: "executeScenario", description: "Execute a recorded scenario by name with optional parameters. Runs all actions in the scenario chain with dependency resolution.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Scenario name to execute" }, parameters: { type: "object", description: "Parameters for scenario execution (e.g., { email: 'user@test.com', password: 'secret' })" }, executeDependencies: { type: "boolean", description: "Execute dependencies before running scenario (default: true)" }, }, required: ["name"], }, }, { name: "listScenarios", description: "Get list of all available scenarios with metadata (name, description, tags, dependencies, timestamps).", inputSchema: { type: "object", properties: {}, }, }, { name: "searchScenarios", description: "Search scenarios by text query or tags. Returns matching scenarios with metadata.", inputSchema: { type: "object", properties: { text: { type: "string", description: "Text to search in name/description" }, tags: { type: "array", items: { type: "string" }, description: "Tags to filter by" }, }, }, }, { name: "getScenarioInfo", description: "Get detailed information about a specific scenario including actions, parameters, and dependencies.", inputSchema: { type: "object", properties: { name: { type: "string", description: "Scenario name" }, includeSecrets: { type: "boolean