UNPKG

@onkernel/cu-playwright

Version:

Computer Use x Playwright SDK

249 lines (213 loc) 7.82 kB
import type { Page } from 'playwright'; import { Action, ToolError } from './types/computer'; import type { ActionParams, BaseAnthropicTool, ToolResult } from './types/computer'; import { KeyboardUtils } from './utils/keyboard'; import { ActionValidator } from './utils/validator'; const TYPING_DELAY_MS = 12; export class ComputerTool implements BaseAnthropicTool { name: 'computer' = 'computer'; protected page: Page; protected _screenshotDelay = 2.0; protected version: '20241022' | '20250124'; private readonly mouseActions = new Set([ Action.LEFT_CLICK, Action.RIGHT_CLICK, Action.MIDDLE_CLICK, Action.DOUBLE_CLICK, Action.TRIPLE_CLICK, Action.MOUSE_MOVE, Action.LEFT_CLICK_DRAG, Action.LEFT_MOUSE_DOWN, Action.LEFT_MOUSE_UP, ]); private readonly keyboardActions = new Set([ Action.KEY, Action.TYPE, Action.HOLD_KEY, ]); private readonly systemActions = new Set([ Action.SCREENSHOT, Action.CURSOR_POSITION, Action.SCROLL, Action.WAIT, ]); constructor(page: Page, version: '20241022' | '20250124' = '20250124') { this.page = page; this.version = version; } get apiType(): 'computer_20241022' | 'computer_20250124' { return this.version === '20241022' ? 'computer_20241022' : 'computer_20250124'; } toParams(): ActionParams { const params = { name: this.name, type: this.apiType, display_width_px: 1280, display_height_px: 720, display_number: null, }; return params; } private getMouseButton(action: Action): 'left' | 'right' | 'middle' { switch (action) { case Action.LEFT_CLICK: case Action.DOUBLE_CLICK: case Action.TRIPLE_CLICK: case Action.LEFT_CLICK_DRAG: case Action.LEFT_MOUSE_DOWN: case Action.LEFT_MOUSE_UP: return 'left'; case Action.RIGHT_CLICK: return 'right'; case Action.MIDDLE_CLICK: return 'middle'; default: throw new ToolError(`Invalid mouse action: ${action}`); } } private async handleMouseAction(action: Action, coordinate: [number, number]): Promise<ToolResult> { const [x, y] = ActionValidator.validateAndGetCoordinates(coordinate); await this.page.mouse.move(x, y); await this.page.waitForTimeout(100); if (action === Action.LEFT_MOUSE_DOWN) { await this.page.mouse.down(); } else if (action === Action.LEFT_MOUSE_UP) { await this.page.mouse.up(); } else { const button = this.getMouseButton(action); if (action === Action.DOUBLE_CLICK) { await this.page.mouse.dblclick(x, y, { button }); } else if (action === Action.TRIPLE_CLICK) { await this.page.mouse.click(x, y, { button, clickCount: 3 }); } else { await this.page.mouse.click(x, y, { button }); } } await this.page.waitForTimeout(500); return await this.screenshot(); } private async handleKeyboardAction(action: Action, text: string, duration?: number): Promise<ToolResult> { if (action === Action.HOLD_KEY) { const key = KeyboardUtils.getPlaywrightKey(text); await this.page.keyboard.down(key); await new Promise(resolve => setTimeout(resolve, duration! * 1000)); await this.page.keyboard.up(key); } else if (action === Action.KEY) { const keys = KeyboardUtils.parseKeyCombination(text); for (const key of keys) { await this.page.keyboard.down(key); } for (const key of keys.reverse()) { await this.page.keyboard.up(key); } } else { await this.page.keyboard.type(text, { delay: TYPING_DELAY_MS }); } await this.page.waitForTimeout(500); return await this.screenshot(); } async screenshot(): Promise<ToolResult> { try { console.log('Starting screenshot...'); await new Promise(resolve => setTimeout(resolve, this._screenshotDelay * 1000)); const screenshot = await this.page.screenshot({ type: 'png' }); console.log('Screenshot taken, size:', screenshot.length, 'bytes'); return { base64Image: screenshot.toString('base64'), }; } catch (error) { throw new ToolError(`Failed to take screenshot: ${error}`); } } async call(params: ActionParams): Promise<ToolResult> { const { action, text, coordinate, scrollDirection: scrollDirectionParam, scroll_amount, scrollAmount, duration, ...kwargs } = params; ActionValidator.validateActionParams(params, this.mouseActions, this.keyboardActions); if (action === Action.SCREENSHOT) { return await this.screenshot(); } if (action === Action.CURSOR_POSITION) { const position = await this.page.evaluate(() => { const selection = window.getSelection(); const range = selection?.getRangeAt(0); const rect = range?.getBoundingClientRect(); return rect ? { x: rect.x, y: rect.y } : null; }); if (!position) { throw new ToolError('Failed to get cursor position'); } return { output: `X=${position.x},Y=${position.y}` }; } if (action === Action.SCROLL) { if (this.version !== '20250124') { throw new ToolError(`${action} is only available in version 20250124`); } const scrollDirection = scrollDirectionParam || kwargs.scroll_direction; const scrollAmountValue = scrollAmount || scroll_amount; if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection)) { throw new ToolError(`Scroll direction "${scrollDirection}" must be 'up', 'down', 'left', or 'right'`); } if (typeof scrollAmountValue !== 'number' || scrollAmountValue < 0) { throw new ToolError(`Scroll amount "${scrollAmountValue}" must be a non-negative number`); } if (coordinate) { const [x, y] = ActionValidator.validateAndGetCoordinates(coordinate); await this.page.mouse.move(x, y); await this.page.waitForTimeout(100); } const pageDimensions = await this.page.evaluate(() => { return { h: window.innerHeight, w: window.innerWidth }; }); const pagePartitions = 25; const scrollFactor = (scrollAmountValue || 10) / pagePartitions; if (scrollDirection === 'down' || scrollDirection === 'up') { const amount = pageDimensions.h * scrollFactor; await this.page.mouse.wheel(0, scrollDirection === 'down' ? amount : -amount); } else { const amount = pageDimensions.w * scrollFactor; await this.page.mouse.wheel(scrollDirection === 'right' ? amount : -amount, 0); } await this.page.waitForTimeout(500); return await this.screenshot(); } if (action === Action.WAIT) { if (this.version !== '20250124') { throw new ToolError(`${action} is only available in version 20250124`); } await new Promise(resolve => setTimeout(resolve, duration! * 1000)); return await this.screenshot(); } if (this.mouseActions.has(action)) { if (!coordinate) { throw new ToolError(`coordinate is required for ${action}`); } return await this.handleMouseAction(action, coordinate); } if (this.keyboardActions.has(action)) { if (!text) { throw new ToolError(`text is required for ${action}`); } return await this.handleKeyboardAction(action, text, duration); } throw new ToolError(`Invalid action: ${action}`); } } // For backward compatibility export class ComputerTool20241022 extends ComputerTool { constructor(page: Page) { super(page, '20241022'); } } export class ComputerTool20250124 extends ComputerTool { constructor(page: Page) { super(page, '20250124'); } }