UNPKG

@gui-agent/operator-aio

Version:
307 lines (306 loc) 14.2 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ "use strict"; var __webpack_require__ = {}; (()=>{ __webpack_require__.d = (exports1, definition)=>{ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, { enumerable: true, get: definition[key] }); }; })(); (()=>{ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop); })(); (()=>{ __webpack_require__.r = (exports1)=>{ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, { value: 'Module' }); Object.defineProperty(exports1, '__esModule', { value: true }); }; })(); var __webpack_exports__ = {}; __webpack_require__.r(__webpack_exports__); __webpack_require__.d(__webpack_exports__, { AIOHybridOperator: ()=>AIOHybridOperator }); const core_namespaceObject = require("@ui-tars/sdk/core"); const logger_namespaceObject = require("@agent-infra/logger"); const media_utils_namespaceObject = require("@agent-infra/media-utils"); const utils_namespaceObject = require("@ui-tars/shared/utils"); const external_utils_js_namespaceObject = require("./utils.js"); const external_AIOComputer_js_namespaceObject = require("./AIOComputer.js"); const external_AIOBrowser_js_namespaceObject = require("./AIOBrowser.js"); function _define_property(obj, key, value) { if (key in obj) Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); else obj[key] = value; return obj; } const logger = new logger_namespaceObject.ConsoleLogger('AioHybridOperator'); class AIOHybridOperator extends core_namespaceObject.Operator { static async create(options) { logger.info('[AioHybridOperator] construct:', options.baseURL); const instance = new AIOHybridOperator(options); await instance.initialize(options); this.currentInstance = instance; return instance; } async initialize(options) { var _this_aioBrowser; this.aioComputer.screenshot(0); this.aioBrowser = await external_AIOBrowser_js_namespaceObject.AIOBrowser.create({ baseURl: options.baseURL, logger: logger }); await (null == (_this_aioBrowser = this.aioBrowser) ? void 0 : _this_aioBrowser.launch({ timeout: 1000, defaultViewport: { width: 1280, height: 1024 } })); logger.info('[AioHybridOperator] AIOBrowser launched successfully'); logger.info('[AioHybridOperator] AIOBrowser initialized successfully'); } async getMeta() { let url = ''; try { var _this_aioBrowser; const retUrl = await (null == (_this_aioBrowser = this.aioBrowser) ? void 0 : _this_aioBrowser.getActiveUrl()); if (retUrl) url = retUrl; } catch (error) { logger.error('Failed to get page meta:', error); } return { url }; } async screenshot() { logger.info('[AioHybridOperator] Taking screenshot'); try { var _result_data; const result = await this.aioComputer.screenshot(); if (!result.success) throw new Error(result.message || 'Screenshot failed'); if (null == (_result_data = result.data) ? void 0 : _result_data.base64) { var _result_data1; const base64Tool = new media_utils_namespaceObject.Base64ImageParser(null == (_result_data1 = result.data) ? void 0 : _result_data1.base64); const dimensions = base64Tool.getDimensions(); if (dimensions) { this.screenshotWidth = null == dimensions ? void 0 : dimensions.width; this.screenshotHeight = null == dimensions ? void 0 : dimensions.height; } logger.info('[AioHybridOperator] screenshot dimensions:', JSON.stringify(dimensions)); return { base64: result.data.base64, scaleFactor: result.data.scaleFactor || 1 }; } throw new Error('No base64 image data received from screenshot API'); } catch (error) { logger.error('[AioHybridOperator] Screenshot failed:', error); throw error; } } async execute(params) { const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params; const { action_type, action_inputs } = parsedPrediction; const startBoxStr = (null == action_inputs ? void 0 : action_inputs.start_box) || ''; logger.info('[AioHybridOperator] Executing action', action_type, action_inputs, ', screen context', this.screenshotWidth, this.screenshotHeight); const { x: rawX, y: rawY, percentX: rawPercentX, percentY: rawPercentY } = (0, external_utils_js_namespaceObject.parseBoxToScreenCoords)({ boxStr: startBoxStr, screenWidth: this.screenshotWidth, screenHeight: this.screenshotHeight, factors: [ 1000, 1000 ] }); const startX = null !== rawX ? Math.round(rawX) : null; const startY = null !== rawY ? Math.round(rawY) : null; logger.info(`[AioHybridOperator] Action position: (${startX}, ${startY})`); logger.info(`[AioHybridOperator] Action position percent raw: (${rawPercentX}, ${rawPercentY})`); let startXPercent = null, startYPercent = null; try { switch(action_type){ case 'navigate': var _this_aioBrowser; logger.info('[AioHybridOperator] Navigating to', null == action_inputs ? void 0 : action_inputs.content); await (null == (_this_aioBrowser = this.aioBrowser) ? void 0 : _this_aioBrowser.handleNavigate({ url: (null == action_inputs ? void 0 : action_inputs.content) || '' })); break; case 'navigate_back': var _this_aioBrowser1; logger.info('[AioHybridOperator] Navigating back'); await (null == (_this_aioBrowser1 = this.aioBrowser) ? void 0 : _this_aioBrowser1.handleNavigateBack()); break; case 'wait': logger.info('[AioHybridOperator] Waiting for 5 seconds'); await (0, utils_namespaceObject.sleep)(5000); break; case 'mouse_move': case 'hover': if (null !== startX && null !== startY) { await this.aioComputer.moveTo(startX, startY); startXPercent = rawPercentX; startYPercent = rawPercentY; } break; case 'click': case 'left_click': case 'left_single': if (null !== startX && null !== startY) { await this.aioComputer.click(startX, startY); startXPercent = rawPercentX; startYPercent = rawPercentY; } break; case 'left_double': case 'double_click': if (null !== startX && null !== startY) { await this.aioComputer.doubleClick(startX, startY); startXPercent = rawPercentX; startYPercent = rawPercentY; } break; case 'right_click': case 'right_single': if (null !== startX && null !== startY) { await this.aioComputer.rightClick(startX, startY); startXPercent = rawPercentX; startYPercent = rawPercentY; } break; case 'middle_click': if (null !== startX && null !== startY) { await this.aioComputer.click(startX, startY, 'middle'); startXPercent = rawPercentX; startYPercent = rawPercentY; } break; case 'left_click_drag': case 'drag': case 'select': if (null == action_inputs ? void 0 : action_inputs.end_box) { const { x: rawEndX, y: rawEndY } = (0, external_utils_js_namespaceObject.parseBoxToScreenCoords)({ boxStr: action_inputs.end_box, screenWidth, screenHeight }); const endX = null !== rawEndX ? Math.round(rawEndX) : null; const endY = null !== rawEndY ? Math.round(rawEndY) : null; if (startX && startY && endX && endY) { await this.aioComputer.moveTo(startX, startY); await this.aioComputer.mouseDown(); await this.aioComputer.dragTo(endX, endY); await this.aioComputer.mouseUp(); } } break; case 'type': { var _action_inputs_content; const content = null == (_action_inputs_content = action_inputs.content) ? void 0 : _action_inputs_content.trim(); if (content) { const stripContent = content.replace(/\\n$/, '').replace(/\n$/, ''); await this.aioComputer.type(stripContent); } break; } case 'hotkey': case 'press': { const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey); if (keyStr) { const keys = keyStr.split(/[\s+]/).filter((k)=>k.length > 0); if (keys.length > 1) await this.aioComputer.hotkey(keys); else await this.aioComputer.press(keyStr); } break; } case 'scroll': { const { direction } = action_inputs; if (null !== startX && null !== startY && direction) { const normalizedDirection = direction.toLowerCase(); let dx = 0, dy = 0; switch(normalizedDirection){ case 'up': dy = 10; break; case 'down': dy = -10; break; case 'left': dx = 10; break; case 'right': dx = -10; break; } if (0 !== dx || 0 !== dy) await this.aioComputer.scroll(dx, dy); } break; } case 'error_env': case 'call_user': case 'finished': case 'user_stop': break; default: logger.warn(`Unsupported action type: ${action_type}`); } logger.info(`[AioHybridOperator] position percent return: (${startXPercent}, ${startYPercent})`); return { startX, startY, startXPercent, startYPercent, action_inputs }; } catch (error) { logger.error("[AioHybridOperator] \u6267\u884C\u5931\u8D25:", error); return { status: core_namespaceObject.StatusEnum.ERROR }; } } constructor(options){ super(), _define_property(this, "aioBrowser", null), _define_property(this, "aioComputer", void 0), _define_property(this, "screenshotWidth", 1280), _define_property(this, "screenshotHeight", 1024); this.aioComputer = new external_AIOComputer_js_namespaceObject.AIOComputer(options); } } _define_property(AIOHybridOperator, "MANUAL", { ACTION_SPACES: [ "click(start_box='[x1, y1, x2, y2]')", "left_double(start_box='[x1, y1, x2, y2]')", "right_single(start_box='[x1, y1, x2, y2]')", "drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')", "hotkey(key='')", "type(content='') #If you want to submit your input, use \"\\n\" at the end of `content`.", "scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')", "wait() #Sleep for 5s and take a screenshot to check for any changes.", "finished()", "call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help." ] }); _define_property(AIOHybridOperator, "currentInstance", null); exports.AIOHybridOperator = __webpack_exports__.AIOHybridOperator; for(var __webpack_i__ in __webpack_exports__)if (-1 === [ "AIOHybridOperator" ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__]; Object.defineProperty(exports, '__esModule', { value: true }); //# sourceMappingURL=AIOHybridOperator.js.map