UNPKG

@ui-tars/operator-nut-js

Version:
240 lines (239 loc) 11 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ import { Operator, StatusEnum, parseBoxToScreenCoords, useContext } from "@ui-tars/sdk/core"; import { Jimp } from "jimp"; import { Button, Key, Point, Region, centerOf, clipboard, keyboard, mouse, screen, sleep, straightTo } from "@computer-use/nut-js"; import big from "big.js"; function _define_property(obj, key, value) { if (key in obj) Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); else obj[key] = value; return obj; } const moveStraightTo = async (startX, startY)=>{ if (null === startX || null === startY) return; await mouse.move(straightTo(new Point(startX, startY))); }; class NutJSOperator extends Operator { async screenshot() { const { logger } = useContext(); const grabImage = await screen.grab(); const screenWithScale = await grabImage.toRGB(); const scaleFactor = screenWithScale.pixelDensity.scaleX; logger.info('[NutjsOperator]', 'scaleX', screenWithScale.pixelDensity.scaleX, 'scaleY', screenWithScale.pixelDensity.scaleY); const screenWithScaleImage = await Jimp.fromBitmap({ width: screenWithScale.width, height: screenWithScale.height, data: Buffer.from(screenWithScale.data) }); const width = screenWithScale.width / screenWithScale.pixelDensity.scaleX; const height = screenWithScale.height / screenWithScale.pixelDensity.scaleY; const physicalScreenImage = await screenWithScaleImage.resize({ w: width, h: height }).getBuffer('image/png'); const output = { base64: physicalScreenImage.toString('base64'), scaleFactor }; null == logger || logger.info(`[NutjsOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`); return output; } async execute(params) { const { logger } = useContext(); const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params; const { action_type, action_inputs } = parsedPrediction; const startBoxStr = (null == action_inputs ? void 0 : action_inputs.start_box) || ''; logger.info('[NutjsOperator] execute', scaleFactor); const { x: startX, y: startY } = parseBoxToScreenCoords({ boxStr: startBoxStr, screenWidth, screenHeight }); logger.info(`[NutjsOperator Position]: (${startX}, ${startY})`); mouse.config.mouseSpeed = 3600; const getHotkeys = (keyStr)=>{ if (keyStr) { const platformCommandKey = 'darwin' === process.platform ? Key.LeftCmd : Key.LeftWin; const platformCtrlKey = 'darwin' === process.platform ? Key.LeftCmd : Key.LeftControl; const keyMap = { return: Key.Enter, ctrl: platformCtrlKey, shift: Key.LeftShift, alt: Key.LeftAlt, 'page down': Key.PageDown, 'page up': Key.PageUp, meta: platformCommandKey, win: platformCommandKey, command: platformCommandKey, cmd: platformCommandKey, ',': Key.Comma, arrowup: Key.Up, arrowdown: Key.Down, arrowleft: Key.Left, arrowright: Key.Right }; const lowercaseKeyMap = Object.fromEntries(Object.entries(Key).map(([k, v])=>[ k.toLowerCase(), v ])); const keys = keyStr.split(/[\s+]/).map((k)=>k.toLowerCase()).map((k)=>keyMap[k] ?? lowercaseKeyMap[k]).filter(Boolean); logger.info('[NutjsOperator] hotkey: ', keys); return keys; } logger.error('[NutjsOperator] hotkey error: ', `${keyStr} is not a valid key`); return []; }; switch(action_type){ case 'wait': logger.info('[NutjsOperator] wait', action_inputs); await sleep(5000); break; case 'mouse_move': case 'hover': logger.info('[NutjsOperator] mouse_move'); await moveStraightTo(startX, startY); break; case 'click': case 'left_click': case 'left_single': logger.info('[NutjsOperator] left_click'); await moveStraightTo(startX, startY); await sleep(100); await mouse.click(Button.LEFT); break; case 'left_double': case 'double_click': logger.info(`[NutjsOperator] ${action_type}(${startX}, ${startY})`); await moveStraightTo(startX, startY); await sleep(100); await mouse.doubleClick(Button.LEFT); break; case 'right_click': case 'right_single': logger.info('[NutjsOperator] right_click'); await moveStraightTo(startX, startY); await sleep(100); await mouse.click(Button.RIGHT); break; case 'middle_click': logger.info('[NutjsOperator] middle_click'); await moveStraightTo(startX, startY); await mouse.click(Button.MIDDLE); break; case 'left_click_drag': case 'drag': case 'select': logger.info('[NutjsOperator] drag', action_inputs); if (null == action_inputs ? void 0 : action_inputs.end_box) { const { x: endX, y: endY } = parseBoxToScreenCoords({ boxStr: action_inputs.end_box, screenWidth, screenHeight }); if (startX && startY && endX && endY) { const diffX = big(endX).minus(startX).toNumber(); const diffY = big(endY).minus(startY).toNumber(); await mouse.drag(straightTo(centerOf(new Region(startX, startY, diffX, diffY)))); } } break; case 'type': { var _action_inputs_content; const content = null == (_action_inputs_content = action_inputs.content) ? void 0 : _action_inputs_content.trim(); logger.info('[NutjsOperator] type', content); if (content) { const stripContent = content.replace(/\\n$/, '').replace(/\n$/, ''); keyboard.config.autoDelayMs = 0; if ('win32' === process.platform) { const originalClipboard = await clipboard.getContent(); await clipboard.setContent(stripContent); await keyboard.pressKey(Key.LeftControl, Key.V); await sleep(50); await keyboard.releaseKey(Key.LeftControl, Key.V); await sleep(50); await clipboard.setContent(originalClipboard); } else await keyboard.type(stripContent); if (content.endsWith('\n') || content.endsWith('\\n')) { await keyboard.pressKey(Key.Enter); await keyboard.releaseKey(Key.Enter); } keyboard.config.autoDelayMs = 500; } break; } case 'hotkey': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); const keys = getHotkeys(keyStr); if (keys.length > 0) { await keyboard.pressKey(...keys); await keyboard.releaseKey(...keys); } break; } case 'press': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); const keys = getHotkeys(keyStr); if (keys.length > 0) await keyboard.pressKey(...keys); break; } case 'release': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); const keys = getHotkeys(keyStr); if (keys.length > 0) await keyboard.releaseKey(...keys); break; } case 'scroll': { const { direction } = action_inputs; if (null !== startX && null !== startY) await moveStraightTo(startX, startY); switch(null == direction ? void 0 : direction.toLowerCase()){ case 'up': await mouse.scrollUp(500); break; case 'down': await mouse.scrollDown(500); break; default: console.warn(`[NutjsOperator] Unsupported scroll direction: ${direction}`); } break; } case 'error_env': case 'call_user': case 'finished': case 'user_stop': return { status: StatusEnum.END }; default: logger.warn(`Unsupported action: ${action_type}`); } } } _define_property(NutJSOperator, "MANUAL", { ACTION_SPACES: [ "click(start_box='[x1, y1, x2, y2]')", "left_double(start_box='[x1, y1, x2, y2]')", "right_single(start_box='[x1, y1, x2, y2]')", "drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')", "hotkey(key='')", "type(content='') #If you want to submit your input, use \"\\n\" at the end of `content`.", "scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')", "wait() #Sleep for 5s and take a screenshot to check for any changes.", "finished()", "call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help." ] }); export { NutJSOperator }; //# sourceMappingURL=index.mjs.map