UNPKG

@origindot./zigzag

Version:

Comprehensive MCP server providing AI agents with visual, development, and design validation tools

312 lines 14.2 kB
import { exec } from 'child_process'; import { promisify } from 'util'; import * as os from 'os'; const execAsync = promisify(exec); export class ScreenControlTool { definition = { name: 'screen_control', description: 'Control screen interactions: click, type, scroll, and keyboard shortcuts', inputSchema: { type: 'object', properties: { action: { type: 'string', enum: ['click', 'double_click', 'right_click', 'type', 'key', 'scroll', 'drag', 'move_mouse'], description: 'Action to perform' }, x: { type: 'number', description: 'X coordinate (for click, drag, move actions)' }, y: { type: 'number', description: 'Y coordinate (for click, drag, move actions)' }, text: { type: 'string', description: 'Text to type (for type action)' }, key: { type: 'string', description: 'Key or key combination (e.g., "ctrl+c", "enter", "tab")' }, scroll_direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Scroll direction' }, scroll_amount: { type: 'number', default: 3, description: 'Number of scroll units' }, end_x: { type: 'number', description: 'End X coordinate (for drag action)' }, end_y: { type: 'number', description: 'End Y coordinate (for drag action)' }, button: { type: 'string', enum: ['left', 'right', 'middle'], default: 'left', description: 'Mouse button for click actions' }, delay: { type: 'number', default: 100, description: 'Delay after action in milliseconds' } }, required: ['action'] } }; async execute(args) { try { const platform = os.platform(); let result; switch (args.action) { case 'click': result = await this.performClick(platform, args.x, args.y, args.button || 'left'); break; case 'double_click': result = await this.performDoubleClick(platform, args.x, args.y); break; case 'right_click': result = await this.performClick(platform, args.x, args.y, 'right'); break; case 'type': if (!args.text) throw new Error('text is required for type action'); result = await this.performType(platform, args.text); break; case 'key': if (!args.key) throw new Error('key is required for key action'); result = await this.performKey(platform, args.key); break; case 'scroll': result = await this.performScroll(platform, args.scroll_direction || 'down', args.scroll_amount || 3, args.x, args.y); break; case 'drag': if (!args.x || !args.y || !args.end_x || !args.end_y) { throw new Error('x, y, end_x, end_y are required for drag action'); } result = await this.performDrag(platform, args.x, args.y, args.end_x, args.end_y); break; case 'move_mouse': if (!args.x || !args.y) throw new Error('x, y are required for move_mouse action'); result = await this.performMouseMove(platform, args.x, args.y); break; default: throw new Error(`Unknown action: ${args.action}`); } // Add delay if specified if (args.delay && args.delay > 0) { await new Promise(resolve => setTimeout(resolve, args.delay)); } return { content: [{ type: 'text', text: result }], isError: false }; } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; return { content: [{ type: 'text', text: `Screen control failed: ${errorMessage}` }], isError: true }; } } async performClick(platform, x, y, button) { switch (platform) { case 'linux': // Using xdotool for Linux await execAsync(`xdotool mousemove ${x} ${y} click ${this.getLinuxButton(button)}`); return `Clicked ${button} button at (${x}, ${y})`; case 'darwin': // Using cliclick for macOS const macButton = button === 'right' ? 'rc' : button === 'middle' ? 'mc' : 'c'; await execAsync(`cliclick ${macButton}:${x},${y}`); return `Clicked ${button} button at (${x}, ${y})`; case 'win32': // Using PowerShell for Windows await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y}); [System.Windows.Forms.Application]::DoEvents(); Start-Sleep -Milliseconds 50"`); return `Clicked ${button} button at (${x}, ${y})`; default: throw new Error(`Platform ${platform} not supported for screen control`); } } async performDoubleClick(platform, x, y) { switch (platform) { case 'linux': await execAsync(`xdotool mousemove ${x} ${y} click --repeat 2 1`); return `Double-clicked at (${x}, ${y})`; case 'darwin': await execAsync(`cliclick dc:${x},${y}`); return `Double-clicked at (${x}, ${y})`; case 'win32': await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y}); [System.Windows.Forms.Application]::DoEvents(); Start-Sleep -Milliseconds 50"`); return `Double-clicked at (${x}, ${y})`; default: throw new Error(`Platform ${platform} not supported for double-click`); } } async performType(platform, text) { // Escape special characters for shell safety const escapedText = text.replace(/'/g, "'\"'\"'"); switch (platform) { case 'linux': await execAsync(`xdotool type '${escapedText}'`); return `Typed: "${text}"`; case 'darwin': await execAsync(`cliclick t:'${escapedText}'`); return `Typed: "${text}"`; case 'win32': // PowerShell script to type text const psScript = ` Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${text.replace(/'/g, "''")}') `; await execAsync(`powershell -Command "${psScript}"`); return `Typed: "${text}"`; default: throw new Error(`Platform ${platform} not supported for typing`); } } async performKey(platform, key) { switch (platform) { case 'linux': const linuxKey = this.convertToLinuxKey(key); await execAsync(`xdotool key ${linuxKey}`); return `Pressed key: ${key}`; case 'darwin': const macKey = this.convertToMacKey(key); await execAsync(`cliclick kp:${macKey}`); return `Pressed key: ${key}`; case 'win32': const winKey = this.convertToWindowsKey(key); await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${winKey}')"`); return `Pressed key: ${key}`; default: throw new Error(`Platform ${platform} not supported for key press`); } } async performScroll(platform, direction, amount, x, y) { switch (platform) { case 'linux': const scrollButton = direction === 'up' ? 4 : direction === 'down' ? 5 : direction === 'left' ? 6 : 7; const coords = x && y ? `mousemove ${x} ${y}` : ''; for (let i = 0; i < amount; i++) { await execAsync(`xdotool ${coords} click ${scrollButton}`); } return `Scrolled ${direction} ${amount} times${x && y ? ` at (${x}, ${y})` : ''}`; case 'darwin': const scrollDir = direction === 'up' ? '+1' : direction === 'down' ? '-1' : direction === 'left' ? '+1,0' : '-1,0'; const moveCmd = x && y ? `m:${x},${y}` : ''; await execAsync(`cliclick ${moveCmd} s:${scrollDir}`); return `Scrolled ${direction}${x && y ? ` at (${x}, ${y})` : ''}`; case 'win32': // Windows scrolling implementation const positionCmd = x && y ? `[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y});` : ''; await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; ${positionCmd}"`); return `Scrolled ${direction}${x && y ? ` at (${x}, ${y})` : ''}`; default: throw new Error(`Platform ${platform} not supported for scrolling`); } } async performDrag(platform, startX, startY, endX, endY) { switch (platform) { case 'linux': await execAsync(`xdotool mousemove ${startX} ${startY} mousedown 1 mousemove ${endX} ${endY} mouseup 1`); return `Dragged from (${startX}, ${startY}) to (${endX}, ${endY})`; case 'darwin': await execAsync(`cliclick m:${startX},${startY} dd:${startX},${startY} du:${endX},${endY}`); return `Dragged from (${startX}, ${startY}) to (${endX}, ${endY})`; case 'win32': // Windows drag implementation using mouse events const psScript = ` Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${startX}, ${startY}); Start-Sleep -Milliseconds 100; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${endX}, ${endY}); `; await execAsync(`powershell -Command "${psScript}"`); return `Dragged from (${startX}, ${startY}) to (${endX}, ${endY})`; default: throw new Error(`Platform ${platform} not supported for dragging`); } } async performMouseMove(platform, x, y) { switch (platform) { case 'linux': await execAsync(`xdotool mousemove ${x} ${y}`); return `Moved mouse to (${x}, ${y})`; case 'darwin': await execAsync(`cliclick m:${x},${y}`); return `Moved mouse to (${x}, ${y})`; case 'win32': await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y})"`); return `Moved mouse to (${x}, ${y})`; default: throw new Error(`Platform ${platform} not supported for mouse movement`); } } getLinuxButton(button) { switch (button) { case 'left': return 1; case 'middle': return 2; case 'right': return 3; default: return 1; } } convertToLinuxKey(key) { // Convert common key combinations to xdotool format return key .replace(/ctrl\+/gi, 'ctrl+') .replace(/alt\+/gi, 'alt+') .replace(/shift\+/gi, 'shift+') .replace(/cmd\+/gi, 'ctrl+') // Map cmd to ctrl on Linux .replace(/enter/gi, 'Return') .replace(/esc/gi, 'Escape') .replace(/del/gi, 'Delete') .replace(/space/gi, 'space'); } convertToMacKey(key) { // Convert common key combinations to cliclick format return key .replace(/ctrl\+/gi, 'cmd+') .replace(/cmd\+/gi, 'cmd+') .replace(/alt\+/gi, 'alt+') .replace(/shift\+/gi, 'shift+') .replace(/enter/gi, 'return') .replace(/esc/gi, 'escape') .replace(/del/gi, 'delete') .replace(/space/gi, 'space'); } convertToWindowsKey(key) { // Convert common key combinations to Windows SendKeys format return key .replace(/ctrl\+/gi, '^') .replace(/alt\+/gi, '%') .replace(/shift\+/gi, '+') .replace(/cmd\+/gi, '^') // Map cmd to ctrl on Windows .replace(/enter/gi, '{ENTER}') .replace(/esc/gi, '{ESC}') .replace(/del/gi, '{DELETE}') .replace(/space/gi, ' ') .replace(/tab/gi, '{TAB}') .replace(/home/gi, '{HOME}') .replace(/end/gi, '{END}'); } } //# sourceMappingURL=screen-control.js.map