UNPKG

@gui-agent/operator-browser

Version:
572 lines (571 loc) 24.2 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ "use strict"; var __webpack_require__ = {}; (()=>{ __webpack_require__.d = (exports1, definition)=>{ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, { enumerable: true, get: definition[key] }); }; })(); (()=>{ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop); })(); (()=>{ __webpack_require__.r = (exports1)=>{ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, { value: 'Module' }); Object.defineProperty(exports1, '__esModule', { value: true }); }; })(); var __webpack_exports__ = {}; __webpack_require__.r(__webpack_exports__); __webpack_require__.d(__webpack_exports__, { BrowserOperator: ()=>BrowserOperator }); const logger_namespaceObject = require("@agent-infra/logger"); const puppeteer_enhance_namespaceObject = require("@agent-infra/puppeteer-enhance"); const base_namespaceObject = require("@gui-agent/shared/base"); const utils_namespaceObject = require("@gui-agent/shared/utils"); const external_ui_helper_js_namespaceObject = require("./ui-helper.js"); function _define_property(obj, key, value) { if (key in obj) Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); else obj[key] = value; return obj; } class BrowserOperator extends base_namespaceObject.Operator { setShowActionInfo(enable) { this.showActionInfo = enable; this.logger.info(`Show Action info ${enable ? 'enabled' : 'disabled'}`); } setShowWaterFlow(enable) { this.showWaterFlowEffect = enable; this.logger.info(`Water flow effect ${enable ? 'enabled' : 'disabled'}`); } setHighlightClickableElements(enable) { this.highlightClickableElements = enable; this.logger.info(`Clickable elements highlighting ${enable ? 'enabled' : 'disabled'}`); } async cleanup() { this.logger.info('Starting cleanup...'); await this.uiHelper.cleanup(); if (this.currentPage) { await this.currentPage.close(); this.currentPage = null; this.logger.info('Page closed successfully'); } this.logger.info('Cleanup completed'); } async destroyInstance() { this.logger.debug('destroyInstance: start'); await this.cleanup(); if (this.browser) await this.browser.close(); } async initialize() { this.logger.info('initialize: getting screen context info...'); const { width, height } = await this.getScreenRect(); const scaleFactor = await this.getDeviceScaleFactor(); this.currentScreenContext = { screenWidth: width, screenHeight: height, scaleX: scaleFactor ?? 1, scaleY: scaleFactor ?? 1 }; } supportedActions() { return [ 'drag', 'navigate', 'navigate_back', 'click', 'double_click', 'right_click', 'type', 'hotkey', 'press', 'release', 'scroll', 'wait', 'finished', 'call_user' ]; } screenContext() { if (this.currentScreenContext) return this.currentScreenContext; throw Error('Get screenContext failed.'); } async screenshot() { this.logger.info('Starting screenshot...'); if (this.showWaterFlowEffect) await this.uiHelper.showWaterFlow(); const page = await this.getActivePage(); try { if (this.highlightClickableElements) { this.logger.info('Highlighting clickable elements...'); await this.uiHelper.highlightClickableElements(); await (0, utils_namespaceObject.sleep)(300); } const startTime = Date.now(); await this.uiHelper.cleanupTemporaryVisuals(); const buffer = await page.screenshot({ captureBeyondViewport: false, encoding: 'base64', type: 'jpeg', quality: 75, fullPage: false }); const duration = Date.now() - startTime; this.logger.info(`Screenshot taken in ${duration}ms`); const output = { status: 'success', base64: buffer.toString(), url: (await this.getMeta()).url }; this.logger.info('Screenshot Info', { ...output, base64: '<base64>' }); return output; } catch (error) { this.logger.error('Screenshot failed:', error); throw error; } finally{ if (this.highlightClickableElements) await this.uiHelper.removeClickableHighlights(); if (this.showWaterFlowEffect) await this.uiHelper.hideWaterFlow(); } } async execute(params) { const { actions, reasoningContent } = params; for (const action of actions){ if (this.showActionInfo) { var _this_uiHelper; this.logger.info('Show action info'); await (null == (_this_uiHelper = this.uiHelper) ? void 0 : _this_uiHelper.showActionInfo(action, reasoningContent ?? '')); } this.logger.info('Execute action', action); await this.singleActionExecutor(action); } return { status: 'success' }; } async singleActionExecutor(action) { this.logger.info('Starting execute with action:', JSON.stringify(action)); const { type: action_type, inputs: action_inputs } = action; this.logger.info(`Executing action: ${action_type}`); try { await this.getActivePage(); switch(action_type){ case 'drag': await this.handleDrag(action_inputs); break; case 'navigate': await this.handleNavigate(action_inputs); break; case 'navigate_back': await this.handleNavigateBack(); break; case 'click': case 'left_click': case 'left_single': await this.handleClick(action_inputs); break; case 'double_click': case 'left_double': await this.handleDoubleClick(action_inputs); break; case 'right_click': await this.handleRightClick(action_inputs); break; case 'type': await this.handleType(action_inputs); await (0, utils_namespaceObject.sleep)(1000); break; case 'hotkey': await this.handleHotkey(action_inputs); break; case 'press': await this.handlePress(action_inputs); break; case 'release': await this.handleRelease(action_inputs); break; case 'scroll': await this.handleScroll(action_inputs); break; case 'wait': await (0, utils_namespaceObject.sleep)(1000 * action_inputs.time || 5000); break; case 'finished': this.uiHelper.cleanup(); break; case 'call_user': this.uiHelper.cleanup(); break; default: this.logger.warn(`Unsupported action: ${action_type}`); } this.logger.info(`Action ${action_type} completed successfully`); } catch (error) { this.logger.error(`Failed to execute ${action_type}:`, error); await this.cleanup(); throw error; } return { status: 'success' }; } async handleClick(inputs) { if (!inputs.point) throw new Error("Missing point for click."); const { realX: x, realY: y } = await this.calculateRealCoords(inputs.point); this.logger.info(`Clicking at (${x}, ${y})`); const page = await this.getActivePage(); try { var _this_uiHelper; await (null == (_this_uiHelper = this.uiHelper) ? void 0 : _this_uiHelper.showClickIndicator(x, y)); await (0, utils_namespaceObject.sleep)(300); await page.mouse.move(x, y); await (0, utils_namespaceObject.sleep)(100); await page.mouse.click(x, y); await (0, utils_namespaceObject.sleep)(800); this.logger.info('Click completed'); } catch (error) { this.logger.error('Click operation failed:', error); throw error; } } async handleDoubleClick(inputs) { if (!inputs.point) throw new Error("Missing point for double click."); const { realX: x, realY: y } = await this.calculateRealCoords(inputs.point); this.logger.info(`Double clicking at (${x}, ${y})`); const page = await this.getActivePage(); try { var _this_uiHelper; await (null == (_this_uiHelper = this.uiHelper) ? void 0 : _this_uiHelper.showClickIndicator(x, y)); await (0, utils_namespaceObject.sleep)(300); await page.mouse.move(x, y); await (0, utils_namespaceObject.sleep)(100); await page.mouse.click(x, y, { clickCount: 2 }); await (0, utils_namespaceObject.sleep)(800); this.logger.info('Double click completed'); } catch (error) { this.logger.error('Double click operation failed:', error); throw error; } } async handleRightClick(inputs) { if (!inputs.point) throw new Error("Missing point for right click."); const { realX: x, realY: y } = await this.calculateRealCoords(inputs.point); this.logger.info(`Right clicking at (${x}, ${y})`); const page = await this.getActivePage(); try { var _this_uiHelper; await (null == (_this_uiHelper = this.uiHelper) ? void 0 : _this_uiHelper.showClickIndicator(x, y)); await (0, utils_namespaceObject.sleep)(300); await page.mouse.move(x, y); await (0, utils_namespaceObject.sleep)(100); await page.mouse.click(x, y, { button: 'right' }); await (0, utils_namespaceObject.sleep)(800); this.logger.info('Right click completed'); } catch (error) { this.logger.error('Right click operation failed:', error); throw error; } } async handleType(inputs) { var _inputs_content; const page = await this.getActivePage(); const content = null == (_inputs_content = inputs.content) ? void 0 : _inputs_content.trim(); if (!content) return void this.logger.warn('No content to type'); this.logger.info('Typing content:', content); const stripContent = content.replace(/\\n$/, '').replace(/\n$/, ''); await page.keyboard.type(stripContent, { delay: 20 + 30 * Math.random() }); if (content.endsWith('\n') || content.endsWith('\\n')) { await (0, utils_namespaceObject.sleep)(50); this.logger.info('Pressing Enter after content'); await page.keyboard.press('Enter'); this.logger.info('Typing completed'); await this.waitForPossibleNavigation(page); } } async handleHotkey(inputs) { const page = await this.getActivePage(); const keyStr = (null == inputs ? void 0 : inputs.key) || (null == inputs ? void 0 : inputs.hotkey); if (!keyStr) { this.logger.warn('No hotkey specified'); throw new Error("No hotkey specified"); } this.logger.info(`Executing hotkey: ${keyStr}`); try { await (await this.getHotkeyExecutor()).press(page, keyStr); } catch (error) { this.logger.error('Hotkey execution failed:', error); } } async handlePress(inputs) { const page = await this.getActivePage(); const keyStr = null == inputs ? void 0 : inputs.key; if (!keyStr) { this.logger.warn('No key specified for press'); throw new Error("No key specified for press"); } this.logger.info(`Pressing key: ${keyStr}`); try { await (await this.getHotkeyExecutor()).down(page, keyStr); } catch (error) { this.logger.error('Press execution failed:', error); } this.logger.info('Press operation completed'); } async handleRelease(inputs) { const page = await this.getActivePage(); const keyStr = null == inputs ? void 0 : inputs.key; if (!keyStr) { this.logger.warn('No key specified for release'); throw new Error("No key specified for release"); } this.logger.info(`Releasing key: ${keyStr}`); try { await (await this.getHotkeyExecutor()).up(page, keyStr); } catch (error) { this.logger.error('Release execution failed:', error); } this.logger.info('Release operation completed'); } async handleScroll(inputs) { const page = await this.getActivePage(); const direction = inputs.direction.toLowerCase(); if (!inputs.point) throw new Error("No point specified for scroll"); const { realX: startX, realY: startY } = await this.calculateRealCoords(inputs.point); if (startX && startY) { this.logger.info(`Moving mouse to scroll position: (${startX}, ${startY})`); await page.mouse.move(startX, startY); await (0, utils_namespaceObject.sleep)(100); } const { screenWidth, screenHeight, scaleX, scaleY } = await this.getScreenContext(); const scrollAmount = 'up' === direction || 'down' === direction ? screenHeight / scaleY * 0.8 : screenWidth / scaleX * 0.8; this.logger.info(`Scrolling ${direction} by ${scrollAmount}px`); switch(direction){ case 'up': await page.mouse.wheel({ deltaY: -scrollAmount }); break; case 'down': await page.mouse.wheel({ deltaY: scrollAmount }); break; case 'left': await page.mouse.wheel({ deltaX: -scrollAmount }); break; case 'right': await page.mouse.wheel({ deltaX: scrollAmount }); break; default: this.logger.warn(`Unsupported scroll direction: ${direction}`); return; } this.logger.info('Scroll completed'); } async handleNavigate(inputs) { if (!inputs.url) throw new Error('No target url specified for navigation'); let { url } = inputs; if (!/^https?:\/\//i.test(url)) url = 'https://' + url; this.logger.info(`Navigating to: ${url}`); const page = await this.getActivePage(); await page.goto(url, { waitUntil: [] }); this.logger.info('Navigation completed'); } async handleDrag(inputs) { if (!inputs.start || !inputs.end) throw new Error('Missing start_point or end_point for drag operation'); const { realX: startX, realY: startY } = await this.calculateRealCoords(inputs.start); const { realX: endX, realY: endY } = await this.calculateRealCoords(inputs.end); if (!startX || !startY || !endX || !endY) throw new Error('Invalid coordinates for drag operation'); try { var _this_uiHelper; const page = await this.getActivePage(); await (null == (_this_uiHelper = this.uiHelper) ? void 0 : _this_uiHelper.showDragIndicator(startX, startY, endX, endY)); await (0, utils_namespaceObject.sleep)(300); await page.mouse.move(startX, startY); await (0, utils_namespaceObject.sleep)(100); await page.mouse.down(); const steps = 10; for(let i = 1; i <= steps; i++){ const stepX = startX + (endX - startX) * i / steps; const stepY = startY + (endY - startY) * i / steps; await page.mouse.move(stepX, stepY); await (0, utils_namespaceObject.sleep)(30); } await (0, utils_namespaceObject.sleep)(100); await page.mouse.up(); await (0, utils_namespaceObject.sleep)(800); this.logger.info('Drag completed'); } catch (error) { this.logger.error('Drag operation failed:', error); throw error; } } async handleNavigateBack() { const page = await this.getActivePage(); this.logger.info("handleNavigateBack"); await page.goBack(); this.logger.info('handleNavigateBack completed'); } async waitForPossibleNavigation(page) { const navigationPromise = new Promise((resolve)=>{ const onStarted = ()=>{ this.logger.info('Navigation started'); resolve(); page.off('framenavigated', onStarted); }; page.on('framenavigated', onStarted); setTimeout(()=>{ page.off('framenavigated', onStarted); resolve(); }, 5000); }); await navigationPromise; this.logger.info('Navigation completed or timed out'); } async getScreenRect() { var _page_viewport, _page_viewport1; const page = await this.getActivePage(); const width = null == (_page_viewport = page.viewport()) ? void 0 : _page_viewport.width; const height = null == (_page_viewport1 = page.viewport()) ? void 0 : _page_viewport1.height; if (!width || !height) throw Error('Get screen context failed.'); this.logger.debug('getScreenRect: w, h: ', `(${width} x ${height})`); return { width, height }; } async getDeviceScaleFactor() { var _page_viewport; if (this.deviceScaleFactor) return this.deviceScaleFactor; const page = await this.getActivePage(); const scaleFactor = null == (_page_viewport = page.viewport()) ? void 0 : _page_viewport.deviceScaleFactor; if (scaleFactor) { this.deviceScaleFactor = scaleFactor; this.logger.debug('getDeviceScaleFactor: deviceScaleFactor: ', scaleFactor); return scaleFactor; } const devicePixelRatio = await page.evaluate(()=>window.devicePixelRatio); if (devicePixelRatio) { this.deviceScaleFactor = devicePixelRatio; this.logger.debug('getDeviceScaleFactor: devicePixelRatio: ', devicePixelRatio); return devicePixelRatio; } throw Error('Get deviceScaleFactor failed.'); } async getActivePage() { const pages = await this.browser.getBrowser().pages(); this.logger.info(`get active pages len: ${pages.length}`); for (const page of pages)try { const visibilityState = await Promise.race([ page.evaluate(()=>document.visibilityState), new Promise((_, reject)=>{ setTimeout(()=>reject(new Error('Visibility check timed out after 3s')), 3000); }) ]); if ('visible' === visibilityState) { this.logger.success('Active visible page retrieved successfully (direct check)'); return page; } } catch (evalError) { this.logger.warn('Warning: checking page visibility directly:', evalError); continue; } for (const page of pages)try { const isVisible = await page.waitForFunction(()=>'visible' === document.visibilityState, { timeout: 3000 }); if (isVisible) { this.logger.success('Active visible page retrieved successfully'); return page; } } catch (waitError) { this.logger.warn(`Visibility check timed out for page: ${page.url()}`); continue; } this.logger.success('Active original page retrieved failed, fallback to active page'); return this.browser.getActivePage(); } async getHotkeyExecutor() { if (this.hotkeyExecutor) return this.hotkeyExecutor; const pptrBrowser = (await this.getActivePage()).browser(); const envInfo = await (0, puppeteer_enhance_namespaceObject.getEnvInfo)(pptrBrowser); this.hotkeyExecutor = new puppeteer_enhance_namespaceObject.Hotkey({ osName: envInfo.osName, browserName: envInfo.browserName }); return this.hotkeyExecutor; } async getMeta() { try { const page = await this.getActivePage(); return { url: page.url() }; } catch (error) { this.logger.error('Failed to get page meta:', error); } return { url: '' }; } async calculateRealCoords(coords) { if (!coords.normalized) { if (!coords.raw) throw new Error('Invalide coordinates'); return { realX: coords.raw.x, realY: coords.raw.y }; } const screenContext = await this.getScreenContext(); return { realX: coords.normalized.x * screenContext.screenWidth, realY: coords.normalized.y * screenContext.screenHeight }; } constructor(options){ super(), _define_property(this, "options", void 0), _define_property(this, "logger", void 0), _define_property(this, "browser", void 0), _define_property(this, "uiHelper", void 0), _define_property(this, "showActionInfo", void 0), _define_property(this, "showWaterFlowEffect", void 0), _define_property(this, "highlightClickableElements", void 0), _define_property(this, "deviceScaleFactor", void 0), _define_property(this, "currentScreenContext", void 0), _define_property(this, "currentPage", void 0), _define_property(this, "hotkeyExecutor", void 0), this.options = options, this.showActionInfo = true, this.showWaterFlowEffect = true, this.highlightClickableElements = true, this.currentPage = null; this.browser = this.options.browser; this.logger = (this.options.logger ?? logger_namespaceObject.defaultLogger).spawn('[BrowserOperator]'); this.uiHelper = new external_ui_helper_js_namespaceObject.UIHelper(()=>this.getActivePage(), this.logger); if (false === options.showActionInfo) this.showActionInfo = false; if (false === options.showWaterFlow) this.showWaterFlowEffect = false; if (false === options.highlightClickableElements) this.highlightClickableElements = false; } } exports.BrowserOperator = __webpack_exports__.BrowserOperator; for(var __webpack_i__ in __webpack_exports__)if (-1 === [ "BrowserOperator" ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__]; Object.defineProperty(exports, '__esModule', { value: true }); //# sourceMappingURL=browser-operator.js.map