UNPKG

@ui-tars/operator-nut-js

Version:
284 lines (283 loc) 14.1 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ "use strict"; var __webpack_require__ = {}; (()=>{ __webpack_require__.n = (module)=>{ var getter = module && module.__esModule ? ()=>module['default'] : ()=>module; __webpack_require__.d(getter, { a: getter }); return getter; }; })(); (()=>{ __webpack_require__.d = (exports1, definition)=>{ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, { enumerable: true, get: definition[key] }); }; })(); (()=>{ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop); })(); (()=>{ __webpack_require__.r = (exports1)=>{ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, { value: 'Module' }); Object.defineProperty(exports1, '__esModule', { value: true }); }; })(); var __webpack_exports__ = {}; __webpack_require__.r(__webpack_exports__); __webpack_require__.d(__webpack_exports__, { NutJSOperator: ()=>NutJSOperator }); const core_namespaceObject = require("@ui-tars/sdk/core"); const external_jimp_namespaceObject = require("jimp"); const nut_js_namespaceObject = require("@computer-use/nut-js"); const external_big_js_namespaceObject = require("big.js"); var external_big_js_default = /*#__PURE__*/ __webpack_require__.n(external_big_js_namespaceObject); function _define_property(obj, key, value) { if (key in obj) Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); else obj[key] = value; return obj; } const moveStraightTo = async (startX, startY)=>{ if (null === startX || null === startY) return; await nut_js_namespaceObject.mouse.move((0, nut_js_namespaceObject.straightTo)(new nut_js_namespaceObject.Point(startX, startY))); }; class NutJSOperator extends core_namespaceObject.Operator { async screenshot() { const { logger } = (0, core_namespaceObject.useContext)(); const grabImage = await nut_js_namespaceObject.screen.grab(); const screenWithScale = await grabImage.toRGB(); const scaleFactor = screenWithScale.pixelDensity.scaleX; logger.info('[NutjsOperator]', 'scaleX', screenWithScale.pixelDensity.scaleX, 'scaleY', screenWithScale.pixelDensity.scaleY); const screenWithScaleImage = await external_jimp_namespaceObject.Jimp.fromBitmap({ width: screenWithScale.width, height: screenWithScale.height, data: Buffer.from(screenWithScale.data) }); const width = screenWithScale.width / screenWithScale.pixelDensity.scaleX; const height = screenWithScale.height / screenWithScale.pixelDensity.scaleY; const physicalScreenImage = await screenWithScaleImage.resize({ w: width, h: height }).getBuffer('image/png'); const output = { base64: physicalScreenImage.toString('base64'), scaleFactor }; null == logger || logger.info(`[NutjsOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`); return output; } async execute(params) { const { logger } = (0, core_namespaceObject.useContext)(); const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params; const { action_type, action_inputs } = parsedPrediction; const startBoxStr = (null == action_inputs ? void 0 : action_inputs.start_box) || ''; logger.info('[NutjsOperator] execute', scaleFactor); const { x: startX, y: startY } = (0, core_namespaceObject.parseBoxToScreenCoords)({ boxStr: startBoxStr, screenWidth, screenHeight }); logger.info(`[NutjsOperator Position]: (${startX}, ${startY})`); nut_js_namespaceObject.mouse.config.mouseSpeed = 3600; const getHotkeys = (keyStr)=>{ if (keyStr) { const platformCommandKey = 'darwin' === process.platform ? nut_js_namespaceObject.Key.LeftCmd : nut_js_namespaceObject.Key.LeftWin; const platformCtrlKey = 'darwin' === process.platform ? nut_js_namespaceObject.Key.LeftCmd : nut_js_namespaceObject.Key.LeftControl; const keyMap = { return: nut_js_namespaceObject.Key.Enter, ctrl: platformCtrlKey, shift: nut_js_namespaceObject.Key.LeftShift, alt: nut_js_namespaceObject.Key.LeftAlt, 'page down': nut_js_namespaceObject.Key.PageDown, 'page up': nut_js_namespaceObject.Key.PageUp, meta: platformCommandKey, win: platformCommandKey, command: platformCommandKey, cmd: platformCommandKey, ',': nut_js_namespaceObject.Key.Comma, arrowup: nut_js_namespaceObject.Key.Up, arrowdown: nut_js_namespaceObject.Key.Down, arrowleft: nut_js_namespaceObject.Key.Left, arrowright: nut_js_namespaceObject.Key.Right }; const lowercaseKeyMap = Object.fromEntries(Object.entries(nut_js_namespaceObject.Key).map(([k, v])=>[ k.toLowerCase(), v ])); const keys = keyStr.split(/[\s+]/).map((k)=>k.toLowerCase()).map((k)=>keyMap[k] ?? lowercaseKeyMap[k]).filter(Boolean); logger.info('[NutjsOperator] hotkey: ', keys); return keys; } logger.error('[NutjsOperator] hotkey error: ', `${keyStr} is not a valid key`); return []; }; switch(action_type){ case 'wait': logger.info('[NutjsOperator] wait', action_inputs); await (0, nut_js_namespaceObject.sleep)(5000); break; case 'mouse_move': case 'hover': logger.info('[NutjsOperator] mouse_move'); await moveStraightTo(startX, startY); break; case 'click': case 'left_click': case 'left_single': logger.info('[NutjsOperator] left_click'); await moveStraightTo(startX, startY); await (0, nut_js_namespaceObject.sleep)(100); await nut_js_namespaceObject.mouse.click(nut_js_namespaceObject.Button.LEFT); break; case 'left_double': case 'double_click': logger.info(`[NutjsOperator] ${action_type}(${startX}, ${startY})`); await moveStraightTo(startX, startY); await (0, nut_js_namespaceObject.sleep)(100); await nut_js_namespaceObject.mouse.doubleClick(nut_js_namespaceObject.Button.LEFT); break; case 'right_click': case 'right_single': logger.info('[NutjsOperator] right_click'); await moveStraightTo(startX, startY); await (0, nut_js_namespaceObject.sleep)(100); await nut_js_namespaceObject.mouse.click(nut_js_namespaceObject.Button.RIGHT); break; case 'middle_click': logger.info('[NutjsOperator] middle_click'); await moveStraightTo(startX, startY); await nut_js_namespaceObject.mouse.click(nut_js_namespaceObject.Button.MIDDLE); break; case 'left_click_drag': case 'drag': case 'select': logger.info('[NutjsOperator] drag', action_inputs); if (null == action_inputs ? void 0 : action_inputs.end_box) { const { x: endX, y: endY } = (0, core_namespaceObject.parseBoxToScreenCoords)({ boxStr: action_inputs.end_box, screenWidth, screenHeight }); if (startX && startY && endX && endY) { const diffX = external_big_js_default()(endX).minus(startX).toNumber(); const diffY = external_big_js_default()(endY).minus(startY).toNumber(); await nut_js_namespaceObject.mouse.drag((0, nut_js_namespaceObject.straightTo)((0, nut_js_namespaceObject.centerOf)(new nut_js_namespaceObject.Region(startX, startY, diffX, diffY)))); } } break; case 'type': { var _action_inputs_content; const content = null == (_action_inputs_content = action_inputs.content) ? void 0 : _action_inputs_content.trim(); logger.info('[NutjsOperator] type', content); if (content) { const stripContent = content.replace(/\\n$/, '').replace(/\n$/, ''); nut_js_namespaceObject.keyboard.config.autoDelayMs = 0; if ('win32' === process.platform) { const originalClipboard = await nut_js_namespaceObject.clipboard.getContent(); await nut_js_namespaceObject.clipboard.setContent(stripContent); await nut_js_namespaceObject.keyboard.pressKey(nut_js_namespaceObject.Key.LeftControl, nut_js_namespaceObject.Key.V); await (0, nut_js_namespaceObject.sleep)(50); await nut_js_namespaceObject.keyboard.releaseKey(nut_js_namespaceObject.Key.LeftControl, nut_js_namespaceObject.Key.V); await (0, nut_js_namespaceObject.sleep)(50); await nut_js_namespaceObject.clipboard.setContent(originalClipboard); } else await nut_js_namespaceObject.keyboard.type(stripContent); if (content.endsWith('\n') || content.endsWith('\\n')) { await nut_js_namespaceObject.keyboard.pressKey(nut_js_namespaceObject.Key.Enter); await nut_js_namespaceObject.keyboard.releaseKey(nut_js_namespaceObject.Key.Enter); } nut_js_namespaceObject.keyboard.config.autoDelayMs = 500; } break; } case 'hotkey': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); const keys = getHotkeys(keyStr); if (keys.length > 0) { await nut_js_namespaceObject.keyboard.pressKey(...keys); await nut_js_namespaceObject.keyboard.releaseKey(...keys); } break; } case 'press': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); const keys = getHotkeys(keyStr); if (keys.length > 0) await nut_js_namespaceObject.keyboard.pressKey(...keys); break; } case 'release': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); const keys = getHotkeys(keyStr); if (keys.length > 0) await nut_js_namespaceObject.keyboard.releaseKey(...keys); break; } case 'scroll': { const { direction } = action_inputs; if (null !== startX && null !== startY) await moveStraightTo(startX, startY); switch(null == direction ? void 0 : direction.toLowerCase()){ case 'up': await nut_js_namespaceObject.mouse.scrollUp(500); break; case 'down': await nut_js_namespaceObject.mouse.scrollDown(500); break; default: console.warn(`[NutjsOperator] Unsupported scroll direction: ${direction}`); } break; } case 'error_env': case 'call_user': case 'finished': case 'user_stop': return { status: core_namespaceObject.StatusEnum.END }; default: logger.warn(`Unsupported action: ${action_type}`); } } } _define_property(NutJSOperator, "MANUAL", { ACTION_SPACES: [ "click(start_box='[x1, y1, x2, y2]')", "left_double(start_box='[x1, y1, x2, y2]')", "right_single(start_box='[x1, y1, x2, y2]')", "drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')", "hotkey(key='')", "type(content='') #If you want to submit your input, use \"\\n\" at the end of `content`.", "scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')", "wait() #Sleep for 5s and take a screenshot to check for any changes.", "finished()", "call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help." ] }); exports.NutJSOperator = __webpack_exports__.NutJSOperator; for(var __webpack_i__ in __webpack_exports__)if (-1 === [ "NutJSOperator" ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__]; Object.defineProperty(exports, '__esModule', { value: true }); //# sourceMappingURL=index.js.map